diff --git a/.gitattributes b/.gitattributes index cc09dcb315e618ca061005ac6cab8c4251372fbf..40f4b2f665e7e3df95b4cb4e537e0feda6e9de95 100644 --- a/.gitattributes +++ b/.gitattributes @@ -145,3 +145,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_ .venv/lib/python3.11/site-packages/torch/_inductor/codegen/__pycache__/cpp_wrapper_cpu.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text .venv/lib/python3.11/site-packages/torch/_inductor/codegen/__pycache__/cpp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text .venv/lib/python3.11/site-packages/torch/_inductor/codegen/__pycache__/common.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/torch/_inductor/fx_passes/__pycache__/split_cat.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/.venv/lib/python3.11/site-packages/torch/_inductor/fx_passes/__pycache__/split_cat.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torch/_inductor/fx_passes/__pycache__/split_cat.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dfd192afb8fcc8c0a2b63773256b70abcd827e35 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/_inductor/fx_passes/__pycache__/split_cat.cpython-311.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6d007391b31b1b010874c0dfd08680792c26a7542040197359230b54bc6612 +size 114085 diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/AccumulateType.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/AccumulateType.h new file mode 100644 index 0000000000000000000000000000000000000000..b1f120e48176c583b56e8f6f75b75069b16b4798 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/AccumulateType.h @@ -0,0 +1,173 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Defines the accumulation type for a scalar type. +// Example: +// using accscalar_t = acc_type; +// +// Accumulation types are an important concept in numeric computing +// because you frequently want to perform intermediate computations +// at a higher precision than the input and output precision, to avoid +// compounding internal rounding errors. Accumulation is the most +// well-known intermediate computation (it is of great importance for +// sum reduction and matrix multiply, for example), but in PyTorch +// acc_type ends up getting used for all sorts of other intermediate +// computations, so it perhaps would be more accurately (ahem) called an +// "accurate" type. acc_type is especially important for reduced +// precision operations like float16 and bfloat16, where relatively +// benign looking inputs can easily end up overflowing/underflowing. +// +// acc_type is parametrized by whether or not you are running on CUDA +// or not, because on CUDA double precision operations are expensive +// and so by default, we don't actually want to use double as an +// acc_type on CUDA. A lot of things are typed out below, but +// basically, the table is generated by a few rules: +// +// If bool: +// Use 'bool' as acc_type. +// If floating point: +// If CUDA, use 'float' as acc_type (unless scalar_t is double), +// otherwise (CPU) use 'double' +// If integral: +// Use 'int64_t' as acc_type +// +// You're not forced to use this template; if you happen to know +// something specific about your use case, you can specify your own +// desired behavior. This template, however, will give you a reasonable +// default that will work for all dtypes supported in PyTorch. + +#if defined(__CUDACC__) +#include +#include +#elif defined(__HIPCC__) +#include +#include +#endif + +namespace at { + +template +struct AccumulateTypeDevice {}; + +template +struct AccumulateType {}; + +template +struct AccumulateType { + using type = typename AccumulateTypeDevice::type; +}; + +template +struct AccumulateType { + using type = typename AccumulateTypeDevice::type; +}; + +template +using acc_type_device = typename AccumulateTypeDevice::type; + +template +using acc_type = typename AccumulateType::type; + +#define ACC_TYPE(t, acc_t, device_type) \ + template <> \ + struct AccumulateTypeDevice { \ + using type = acc_t; \ + }; +#define MPS_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::MPS) +#define XPU_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::XPU) +#define CUDA_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::CUDA) +#define CPU_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::CPU) + +MPS_ACC_TYPE(BFloat16, float); +MPS_ACC_TYPE(Half, float); +MPS_ACC_TYPE(Float8_e5m2, float); +MPS_ACC_TYPE(Float8_e4m3fn, float); +MPS_ACC_TYPE(Float8_e5m2fnuz, float); +MPS_ACC_TYPE(Float8_e4m3fnuz, float); +MPS_ACC_TYPE(float, float); +MPS_ACC_TYPE(double, float); +MPS_ACC_TYPE(int8_t, int64_t); +MPS_ACC_TYPE(uint8_t, int64_t); +MPS_ACC_TYPE(char, int64_t); +MPS_ACC_TYPE(int16_t, int64_t); +MPS_ACC_TYPE(int32_t, int64_t); +MPS_ACC_TYPE(int64_t, int64_t); +MPS_ACC_TYPE(bool, bool); +MPS_ACC_TYPE(c10::complex, c10::complex); +MPS_ACC_TYPE(c10::complex, c10::complex); +MPS_ACC_TYPE(c10::complex, c10::complex); + +XPU_ACC_TYPE(BFloat16, float); +XPU_ACC_TYPE(Half, float); +XPU_ACC_TYPE(Float8_e5m2, float); +XPU_ACC_TYPE(Float8_e4m3fn, float); +XPU_ACC_TYPE(Float8_e5m2fnuz, float); +XPU_ACC_TYPE(Float8_e4m3fnuz, float); +XPU_ACC_TYPE(float, float); +XPU_ACC_TYPE(double, double); +XPU_ACC_TYPE(int8_t, int64_t); +XPU_ACC_TYPE(uint8_t, int64_t); +XPU_ACC_TYPE(char, int64_t); +XPU_ACC_TYPE(int16_t, int64_t); +XPU_ACC_TYPE(int32_t, int64_t); +XPU_ACC_TYPE(int64_t, int64_t); +XPU_ACC_TYPE(bool, bool); +XPU_ACC_TYPE(c10::complex, c10::complex); +XPU_ACC_TYPE(c10::complex, c10::complex); +XPU_ACC_TYPE(c10::complex, c10::complex); + +#if defined(__CUDACC__) || defined(__HIPCC__) +CUDA_ACC_TYPE(half, float); +#endif +CUDA_ACC_TYPE(BFloat16, float); +CUDA_ACC_TYPE(Half, float); +CUDA_ACC_TYPE(Float8_e5m2, float); +CUDA_ACC_TYPE(Float8_e4m3fn, float); +CUDA_ACC_TYPE(Float8_e5m2fnuz, float); +CUDA_ACC_TYPE(Float8_e4m3fnuz, float); +CUDA_ACC_TYPE(float, float); +CUDA_ACC_TYPE(double, double); +CUDA_ACC_TYPE(int8_t, int64_t); +CUDA_ACC_TYPE(uint8_t, int64_t); +CUDA_ACC_TYPE(char, int64_t); +CUDA_ACC_TYPE(int16_t, int64_t); +CUDA_ACC_TYPE(int32_t, int64_t); +CUDA_ACC_TYPE(int64_t, int64_t); +CUDA_ACC_TYPE(bool, bool); +CUDA_ACC_TYPE(c10::complex, c10::complex); +CUDA_ACC_TYPE(c10::complex, c10::complex); +CUDA_ACC_TYPE(c10::complex, c10::complex); + +CPU_ACC_TYPE(BFloat16, float); +CPU_ACC_TYPE(Half, float); +CPU_ACC_TYPE(Float8_e5m2, float); +CPU_ACC_TYPE(Float8_e4m3fn, float); +CPU_ACC_TYPE(Float8_e5m2fnuz, float); +CPU_ACC_TYPE(Float8_e4m3fnuz, float); +CPU_ACC_TYPE(float, double); +CPU_ACC_TYPE(double, double); +CPU_ACC_TYPE(int8_t, int64_t); +CPU_ACC_TYPE(uint8_t, int64_t); +CPU_ACC_TYPE(char, int64_t); +CPU_ACC_TYPE(int16_t, int64_t); +CPU_ACC_TYPE(int32_t, int64_t); +CPU_ACC_TYPE(int64_t, int64_t); +CPU_ACC_TYPE(bool, bool); +CPU_ACC_TYPE(c10::complex, c10::complex); +CPU_ACC_TYPE(c10::complex, c10::complex); +CPU_ACC_TYPE(c10::complex, c10::complex); + +TORCH_API c10::ScalarType toAccumulateType( + c10::ScalarType type, + c10::DeviceType device); +TORCH_API c10::ScalarType toAccumulateType(c10::ScalarType type, bool is_cuda); + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Backend.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Backend.h new file mode 100644 index 0000000000000000000000000000000000000000..9651469e190085d913ba9b5d1ca02085886fc4e1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Backend.h @@ -0,0 +1,2 @@ +#pragma once +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUApplyUtils.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUApplyUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..5c524ef97c475a0529b7b18c430be0d39c350aa4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUApplyUtils.h @@ -0,0 +1,343 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace at { + +/* + * The basic strategy for apply is as follows: + * + * 1. Starting with the outermost index, loop until we reach a dimension where + * the data is no longer contiguous, i.e. the stride at that dimension is not + * equal to the size of the tensor defined by the outer dimensions. Let's call + * this outer (contiguous) tensor A. Note that if the Tensor is contiguous, then + * A is equal to the entire Tensor. Let's call the inner tensor B. + * + * 2. We loop through the indices in B, starting at its outermost dimension. For + * example, if B is a 2x2 matrix, then we do: + * + * B[0][0] + * B[0][1] + * B[1][0] + * B[1][1] + * + * We set the offset into the underlying storage as (storageOffset + stride_B * + * index_B), i.e. basically we compute the offset into the storage as we would + * normally for a Tensor. But because we are guaranteed the subsequent data is + * contiguous in memory, we can simply loop for sizeof(A) iterations and perform + * the operation, without having to follow the order described by the strides of + * A. + * + * 3. As an optimization, we merge dimensions of A that are contiguous in + * memory. For example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, + * then the first two dimensions can be merged for the purposes of APPLY, + * reducing the number of nested loops. + */ + +inline Tensor sort_strides(Tensor& tensor_) { + IntArrayRef strides = tensor_.strides(); + std::vector indices; + indices.reserve(tensor_.ndimension()); + for (const auto i : c10::irange(tensor_.ndimension())) { + indices.push_back(i); + } + std::sort(indices.begin(), indices.end(), [&strides](int64_t i1, int64_t i2) { + return strides[i1] > strides[i2]; + }); + Tensor tensor = tensor_.permute(indices); + return tensor; +} + +template +struct strided_tensor_iter_fixed { + public: + T* data_ = NULL; + int64_t dim_ = 0; + + int64_t counter_[N] = {0}; + int64_t sizes_[N] = {0}; + int64_t strides_[N] = {0}; + + strided_tensor_iter_fixed(strided_tensor_iter_fixed const&) = delete; + void operator=(strided_tensor_iter_fixed const& x) = delete; + strided_tensor_iter_fixed(strided_tensor_iter_fixed&&) = default; + strided_tensor_iter_fixed( + Tensor& tensor, + C10_UNUSED bool sort_strides = false) + : data_(tensor.data_ptr()) { + std::memset(counter_, 0, sizeof(int64_t) * N); + if (tensor.dim() > 0) { + std::memcpy( + sizes_, tensor.sizes().data(), tensor.dim() * sizeof(int64_t)); + std::memcpy( + strides_, tensor.strides().data(), tensor.dim() * sizeof(int64_t)); + } + dim_ = std::get<1>(collapse_dims(sizes_, strides_, tensor.ndimension())); + } +}; + +template +struct strided_tensor_iter { + private: + public: + T* data_ = NULL; + int64_t dim_; + + std::vector counter_; + std::vector sizes_; + std::vector strides_; + + strided_tensor_iter(strided_tensor_iter const&) = delete; + void operator=(strided_tensor_iter const& x) = delete; + strided_tensor_iter(strided_tensor_iter&&) = default; + strided_tensor_iter(Tensor& tensor) + : data_(tensor.data_ptr()), + dim_(tensor.ndimension()), + counter_(dim_, 0), + sizes_(tensor.sizes().vec()), + strides_(tensor.strides().vec()) { + dim_ = std::get<1>(collapse_dims(sizes_.data(), strides_.data(), dim_)); + } +}; + +inline bool _all_equal_numel(at::ArrayRef tensors) { + if (tensors.empty()) + return true; + int64_t all_numel = tensors[0].numel(); + for (const auto i : c10::irange(1, tensors.size())) { + if (tensors[i].numel() != all_numel) + return false; + } + return true; +} + +inline std::string _all_equal_numel_error(at::ArrayRef tensors) { + std::ostringstream oss; + oss << "inconsistent tensor size, expected "; + for (size_t i = 0; i < tensors.size() - 1; i++) { + oss << tensors[i].sizes() << ", "; + } + oss << "and " << tensors[tensors.size() - 1].sizes() + << " to have the same number of elements, but got "; + for (size_t i = 0; i < tensors.size() - 1; i++) { + oss << tensors[i].numel() << ", "; + } + oss << "and " << tensors[tensors.size() - 1].numel() + << " elements respectively"; + return oss.str(); +} + +inline bool _apply_preamble(ArrayRef tensors) { + checkDeviceType("CPU_tensor_apply", tensors, kCPU); + checkLayout("CPU_tensor_apply", tensors, kStrided); + if (!_all_equal_numel(tensors)) + AT_ERROR(_all_equal_numel_error(tensors)); + // An empty tensor has no elements + for (auto& t : tensors) + if (t.numel() == 0) + return false; + return true; +} + +inline int64_t _max_dim_tensors(ArrayRef tensors) { + int64_t dim = 0; + for (auto& t : tensors) + dim = std::max(dim, t.ndimension()); + return dim; +} + +inline void iterate(int64_t /*size*/){}; + +template +inline void iterate(int64_t size, Arg& iter, Args&... iter_tail) { + iter.counter_[iter.dim_ - 1] += size; + iter.data_ = iter.data_ + size * iter.strides_[iter.dim_ - 1]; + iterate(size, iter_tail...); +} + +inline bool iterate_continue() { + return true; +}; + +template +inline bool iterate_continue(Arg& iter, Args&... iter_tail) { + return iter.counter_[iter.dim_ - 1] < iter.sizes_[iter.dim_ - 1] && + iterate_continue(iter_tail...); +} + +inline int64_t max_iterate_size() { + return std::numeric_limits::max(); +}; + +template +inline int64_t max_iterate_size(Arg& iter, Args&... iter_tail) { + return std::min( + (iter.sizes_[iter.dim_ - 1] - iter.counter_[iter.dim_ - 1]), + max_iterate_size(iter_tail...)); +} + +inline void iterate_overflow(){}; + +template +inline void iterate_overflow(Arg& iter, Args&... iter_tail) { + if (iter.counter_[iter.dim_ - 1] == iter.sizes_[iter.dim_ - 1]) { + for (int64_t i = iter.dim_ - 1; i > 0; i--) { + if (iter.counter_[i] == iter.sizes_[i]) { + iter.counter_[i] = 0; + iter.counter_[i - 1]++; + iter.data_ = iter.data_ - (iter.sizes_[i] * iter.strides_[i]) + + iter.strides_[i - 1]; + } + } + } + iterate_overflow(iter_tail...); +} + +inline void forward(int64_t /*offset*/){}; + +template +inline void forward(int64_t offset, Arg& iter, Args&... iter_tail) { + int64_t multi = offset; + for (int64_t i = iter.dim_ - 1; i >= 0; i--) { + int64_t inc = multi % iter.sizes_[i]; + multi = multi / iter.sizes_[i]; + iter.data_ = iter.data_ + inc * iter.strides_[i]; + iter.counter_[i] += inc; + } + forward(offset, iter_tail...); +} + +inline int64_t max_dim() { + return 0; +} + +template +inline int64_t max_dim(Arg& iter, Args&... iter_tail) { + return std::max(iter.dim_, max_dim(iter_tail...)); +} + +inline void apply_op(){}; + +template +inline void apply_op( + int64_t numel, + int64_t offset, + const Op& op, + Args... iters) { + // For 0-dim tensors + if (numel == 1 && max_dim(iters...) == 0) { + op(*iters.data_...); + return; + } + if (offset > 0) + forward(offset, iters...); + // Splitting this into chunks helps the compiler create faster assembly + for (int64_t i = 0; i < numel;) { + for (; iterate_continue(iters...) && i < numel;) { + op(*iters.data_...); + iterate(1, iters...); + i++; + } + iterate_overflow(iters...); + } +} + +/* + Apply a pointwise operator to sequence of tensors + + The calling convention for op is a function/functor that takes the same + number of pointers of type scalar as the number of given tensors. For example, + to compute a = b * c, op would be of the form: + [](scalar* a_val, const scalar* b_val, const scalar* c_val) { a_val[0] = + b_val[0] * c_val[0]; }; +*/ + +template +inline void CPU_tensor_apply2(Tensor tensor1, Tensor tensor2, const Op op) { + if (!_apply_preamble({tensor1, tensor2})) + return; + if (_max_dim_tensors({tensor1, tensor2}) <= 8) { + apply_op( + tensor1.numel(), + 0, + op, + strided_tensor_iter_fixed(tensor1), + strided_tensor_iter_fixed(tensor2)); + } else { + apply_op( + tensor1.numel(), + 0, + op, + strided_tensor_iter(tensor1), + strided_tensor_iter(tensor2)); + } +} + +template +inline void CPU_tensor_apply3( + Tensor tensor1, + Tensor tensor2, + Tensor tensor3, + const Op op) { + if (!_apply_preamble({tensor1, tensor2, tensor3})) + return; + if (_max_dim_tensors({tensor1, tensor2, tensor3}) <= 8) { + apply_op( + tensor1.numel(), + 0, + op, + strided_tensor_iter_fixed(tensor1), + strided_tensor_iter_fixed(tensor2), + strided_tensor_iter_fixed(tensor3)); + } else { + apply_op( + tensor1.numel(), + 0, + op, + strided_tensor_iter(tensor1), + strided_tensor_iter(tensor2), + strided_tensor_iter(tensor3)); + } +} + +template < + typename scalar1, + typename scalar2, + typename scalar3, + typename scalar4, + typename Op> +inline void CPU_tensor_apply4( + Tensor tensor1, + Tensor tensor2, + Tensor tensor3, + Tensor tensor4, + const Op op) { + if (!_apply_preamble({tensor1, tensor2, tensor3, tensor4})) + return; + if (_max_dim_tensors({tensor1, tensor2, tensor3, tensor4}) <= 8) { + apply_op( + tensor1.numel(), + 0, + op, + strided_tensor_iter_fixed(tensor1), + strided_tensor_iter_fixed(tensor2), + strided_tensor_iter_fixed(tensor3), + strided_tensor_iter_fixed(tensor4)); + } else { + apply_op( + tensor1.numel(), + 0, + op, + strided_tensor_iter(tensor1), + strided_tensor_iter(tensor2), + strided_tensor_iter(tensor3), + strided_tensor_iter(tensor4)); + } +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUFixedAllocator.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUFixedAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..cf621f34cc63735d7f7557f48146bb76467b8afc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUFixedAllocator.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + +// This file creates a fake allocator that just throws exceptions if +// it is actually used. + +// state passed to the allocator is the std::function called +// when the blob is release by ATen + +namespace at { + +static cpu_fixed_malloc(void*, ptrdiff_t) { + AT_ERROR("attempting to resize a tensor view of an external blob"); +} + +static cpu_fixed_realloc(void*, void*, ptrdiff_t) { + AT_ERROR("attempting to resize a tensor view of an external blob"); +} + +static cpu_fixed_free(void* state, void* allocation) { + auto on_release = static_cast*>(state); + (*on_release)(allocation); + delete on_release; +} + +static Allocator CPU_fixed_allocator = { + cpu_fixed_malloc, + cpu_fixed_realloc, + cpu_fixed_free}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUFunctions.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..b1f3044dec0537027c3474f1ed5a49a446604ded --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUFunctions.h @@ -0,0 +1,29 @@ +#include + +// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch] +// Code introduced to avoid cyclic dependency in static dispatch is no longer +// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place, +// to Operators.cpp for supporting multiple backends with multiple kernels. +// +// Note [Avoiding Include Cycles In Static Dispatch] +// In order to avoid #include cycles in the static dispatch build, we've carefully split out +// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h. +// +// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h. +// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods +// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all +// directly inlined into TensorBody.h. +// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API, +// which include functions that have defaultable std::optional arguments. +// That requires knowing the full Tensor class definition. +// +// We break the cycle by doing the following: +// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h +// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl., +// - CPUFunctions_inl.h includes everything else +// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class, +// and then it includes CPUFunctions_inl.h. +// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly. +// - This also means that static dispatch build, CPUFunctions.h only needs to +// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h. +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUGeneratorImpl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUGeneratorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..e15ca23d6bf7486db9616467e44a69e099de9061 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CPUGeneratorImpl.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +namespace at { + +struct TORCH_API CPUGeneratorImpl : public c10::GeneratorImpl { + // Constructors + CPUGeneratorImpl(uint64_t seed_in = default_rng_seed_val); + ~CPUGeneratorImpl() override = default; + + // CPUGeneratorImpl methods + std::shared_ptr clone() const; + void set_current_seed(uint64_t seed) override; + void set_offset(uint64_t offset) override; + uint64_t get_offset() const override; + uint64_t current_seed() const override; + uint64_t seed() override; + void set_state(const c10::TensorImpl& new_state) override; + c10::intrusive_ptr get_state() const override; + static c10::DeviceType device_type(); + uint32_t random(); + uint64_t random64(); + std::optional next_float_normal_sample(); + std::optional next_double_normal_sample(); + void set_next_float_normal_sample(std::optional randn); + void set_next_double_normal_sample(std::optional randn); + at::mt19937 engine(); + void set_engine(at::mt19937 engine); + + private: + CPUGeneratorImpl* clone_impl() const override; + at::mt19937 engine_; + std::optional next_float_normal_sample_; + std::optional next_double_normal_sample_; +}; + +namespace detail { + +TORCH_API const Generator& getDefaultCPUGenerator(); +TORCH_API Generator +createCPUGenerator(uint64_t seed_val = default_rng_seed_val); + +} // namespace detail + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeExplicitAutogradFunctions_inl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeExplicitAutogradFunctions_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..1d92cd2f68c8b913dc3cd7e620ad7a4a9c125d39 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeExplicitAutogradFunctions_inl.h @@ -0,0 +1,553 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunctions_inl.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS) +#error This change adds a dependency on all pytorch operators, meaning the \ + file will need to be re-compiled every time an operator is changed or added. \ + Consider including a specific operator from \ + . \ + See NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS]. +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradFunctions.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..51bc47dd4f55455d8009a26f33e76632852ab74f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradFunctions.h @@ -0,0 +1,29 @@ +#include + +// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch] +// Code introduced to avoid cyclic dependency in static dispatch is no longer +// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place, +// to Operators.cpp for supporting multiple backends with multiple kernels. +// +// Note [Avoiding Include Cycles In Static Dispatch] +// In order to avoid #include cycles in the static dispatch build, we've carefully split out +// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h. +// +// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h. +// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods +// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all +// directly inlined into TensorBody.h. +// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API, +// which include functions that have defaultable std::optional arguments. +// That requires knowing the full Tensor class definition. +// +// We break the cycle by doing the following: +// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h +// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl., +// - CPUFunctions_inl.h includes everything else +// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class, +// and then it includes CPUFunctions_inl.h. +// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly. +// - This also means that static dispatch build, CPUFunctions.h only needs to +// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h. +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradNestedTensorFunctions.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradNestedTensorFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..4278ea0820295f0285b30359a5b26e6100d257ef --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradNestedTensorFunctions.h @@ -0,0 +1,29 @@ +#include + +// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch] +// Code introduced to avoid cyclic dependency in static dispatch is no longer +// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place, +// to Operators.cpp for supporting multiple backends with multiple kernels. +// +// Note [Avoiding Include Cycles In Static Dispatch] +// In order to avoid #include cycles in the static dispatch build, we've carefully split out +// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h. +// +// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h. +// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods +// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all +// directly inlined into TensorBody.h. +// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API, +// which include functions that have defaultable std::optional arguments. +// That requires knowing the full Tensor class definition. +// +// We break the cycle by doing the following: +// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h +// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl., +// - CPUFunctions_inl.h includes everything else +// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class, +// and then it includes CPUFunctions_inl.h. +// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly. +// - This also means that static dispatch build, CPUFunctions.h only needs to +// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h. +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradNestedTensorFunctions_inl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradNestedTensorFunctions_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..90ffa6b1eb4a9cc5d64851784113a739e385a77e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/CompositeImplicitAutogradNestedTensorFunctions_inl.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunctions_inl.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS) +#error This change adds a dependency on all pytorch operators, meaning the \ + file will need to be re-compiled every time an operator is changed or added. \ + Consider including a specific operator from \ + . \ + See NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS]. +#endif + +#include +#include +#include +#include + + + diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h new file mode 100644 index 0000000000000000000000000000000000000000..b7a964bb4e403711ff9c642d95721d9c7a99b274 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h @@ -0,0 +1,21 @@ +#pragma once + +// Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's +// obvious if you forgot to include Config.h +// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined +// +// DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h + +#define AT_MKLDNN_ENABLED() 1 +#define AT_MKLDNN_ACL_ENABLED() 0 +#define AT_MKL_ENABLED() 1 +#define AT_MKL_SEQUENTIAL() 0 +#define AT_POCKETFFT_ENABLED() 0 +#define AT_NNPACK_ENABLED() 1 +#define CAFFE2_STATIC_LINK_CUDA() 0 +#define AT_BUILD_WITH_BLAS() 1 +#define AT_BUILD_WITH_LAPACK() 1 +#define AT_PARALLEL_OPENMP 1 +#define AT_PARALLEL_NATIVE 0 +#define AT_BLAS_F2C() 0 +#define AT_BLAS_USE_CBLAS_DOT() 0 diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Context.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Context.h new file mode 100644 index 0000000000000000000000000000000000000000..51180095953e932cf95e928bfcfb8b32b4882204 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Context.h @@ -0,0 +1,610 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace at { + +class Tensor; + +enum class TORCH_API Float32MatmulPrecision { HIGHEST, HIGH, MEDIUM }; + +class TORCH_API Context { + public: + Context(); + + const Generator& defaultGenerator(Device device) { + c10::DeviceType device_type = device.type(); + initCUDAIfNeeded(device_type); + initHIPIfNeeded(device_type); + if (device_type == at::kCPU) { + return at::detail::getDefaultCPUGenerator(); + } else if (device_type == at::kCUDA) { + return at::detail::getCUDAHooks().getDefaultCUDAGenerator(device.index()); + } else if (device_type == at::kMPS) { + return at::detail::getMPSHooks().getDefaultMPSGenerator(); + } else if (device_type == at::kXPU) { + return at::detail::getXPUHooks().getDefaultXPUGenerator(device.index()); + } else if (device_type == at::kIPU) { + return at::detail::getIPUHooks().getDefaultIPUGenerator(device.index()); + } else if (device_type == at::kPrivateUse1) { + return at::detail::getPrivateUse1Hooks().getDefaultGenerator( + device.index()); + } else { + AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled."); + } + } + const AcceleratorHooksInterface& getAcceleratorHooksInterface( + std::optional opt_device_type = std::nullopt) { + c10::DeviceType device_type = opt_device_type.has_value() + ? opt_device_type.value() + : at::getAccelerator(true).value(); + if (device_type == at::kCUDA) { + return at::detail::getCUDAHooks(); + } else if (device_type == at::kXPU) { + return at::detail::getXPUHooks(); + } else if (device_type == at::kMPS) { + return at::detail::getMPSHooks(); + } else if (device_type == at::kPrivateUse1) { + return at::detail::getPrivateUse1Hooks(); + } else if (device_type == at::kMTIA) { + return at::detail::getMTIAHooks(); + } else if (device_type == at::kHIP) { + return at::detail::getHIPHooks(); + } else { + AT_ERROR( + c10::DeviceTypeName(device_type), " device type not an accelerator."); + } + } + Device getDeviceFromPtr(void* data, c10::DeviceType device_type) { + initCUDAIfNeeded(device_type); + initHIPIfNeeded(device_type); + initXPUIfNeeded(device_type); + if (device_type == at::kCPU) { + return c10::DeviceType::CPU; + } else if (device_type == at::kCUDA) { + return at::detail::getCUDAHooks().getDeviceFromPtr(data); + } else if (device_type == at::kXPU) { + return at::detail::getXPUHooks().getDeviceFromPtr(data); + } else if (device_type == at::kPrivateUse1) { + return at::detail::getPrivateUse1Hooks().getDeviceFromPtr(data); + } else { + AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled."); + } + } + bool isPinnedPtr( + const void* data, + std::optional device_type = std::nullopt) { + auto opt_device_type = + device_type.has_value() ? device_type : at::getAccelerator(); + if (!opt_device_type.has_value() || // there is no accelerator + !at::isAccelerator( + opt_device_type.value())) { // passed device not an accelerator + return false; + } + return getAcceleratorHooksInterface(opt_device_type.value()) + .isPinnedPtr(data); + } + Allocator* getPinnedMemoryAllocator( + std::optional device_type = std::nullopt) { + return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator(); + } + static bool hasOpenMP(); + static bool hasMKL(); + static bool hasLAPACK(); + static bool hasMKLDNN(); + static bool hasMAGMA() { + return detail::getCUDAHooks().hasMAGMA(); + } + static bool hasCUDA() { + return detail::getCUDAHooks().hasCUDA(); + } + static bool hasMTIA() { + return detail::getMTIAHooks().hasMTIA(); + } + static bool hasCUDART() { + return detail::getCUDAHooks().hasCUDART(); + } + static long versionCUDART() { + return detail::getCUDAHooks().versionCUDART(); + } + static bool hasCuDNN() { + return detail::getCUDAHooks().hasCuDNN(); + } + static long versionCuDNN() { + return detail::getCUDAHooks().versionCuDNN(); + } + static bool hasCuSOLVER() { + return detail::getCUDAHooks().hasCuSOLVER(); + } + static bool hasCuBLASLt() { + return detail::getCUDAHooks().hasCuBLASLt(); + } + static bool hasHIP() { + return detail::getHIPHooks().hasHIP(); + } + static bool hasMPS() { + return detail::getMPSHooks().hasMPS(); + } + static bool hasIPU() { + return c10::impl::hasDeviceGuardImpl(c10::DeviceType::IPU); + } + static bool hasXLA() { + return c10::impl::hasDeviceGuardImpl(c10::DeviceType::XLA); + } + static bool hasXPU() { + return detail::getXPUHooks().hasXPU(); + } + static bool hasLazy() { + return c10::impl::hasDeviceGuardImpl(c10::DeviceType::Lazy); + } + static bool hasMAIA() { + return c10::impl::hasDeviceGuardImpl(c10::DeviceType::MAIA); + } + // defined in header so that getNonVariableType has ability to inline + // call_once check. getNonVariableType is called fairly frequently + void lazyInitCUDA() { + c10::call_once(thc_init, [&] { detail::getCUDAHooks().initCUDA(); }); + } + void lazyInitHIP() { + c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); }); + } + void lazyInitXPU() { + c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); }); + } + void lazyInitMTIA() { + c10::call_once(th_mtia_init, [&] { detail::getMTIAHooks().initMTIA(); }); + } + void lazyInitPrivateUse1() { + c10::call_once(thp_init, [&] { + if (isPrivateUse1HooksRegistered()) { + at::detail::getPrivateUse1Hooks().initPrivateUse1(); + } + }); + } + static const at::cuda::NVRTC& getNVRTC() { + return detail::getCUDAHooks().nvrtc(); + } + + static bool setFlushDenormal(bool on); + + // NB: This method is *purely* whether or not a user requested + // that CuDNN was enabled, it doesn't actually say anything about + // whether or not CuDNN is actually usable. Use cudnn_is_acceptable + // to test this instead + bool userEnabledCuDNN() const; + void setUserEnabledCuDNN(bool e); + bool userEnabledMkldnn() const; + void setUserEnabledMkldnn(bool e); + bool benchmarkCuDNN() const; + void setBenchmarkCuDNN(bool); + int benchmarkLimitCuDNN() const; + void setBenchmarkLimitCuDNN(int); + bool deterministicCuDNN() const; + void setDeterministicCuDNN(bool); + bool deterministicMkldnn() const; + void setDeterministicMkldnn(bool); + bool userEnabledNNPACK() const; + void setUserEnabledNNPACK(bool e); + + // Note [Disabling Fused SDP Kernels] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Flash and Memory Efficient SDP kernels are enabled by default. + // However, they can be disabled by setting + // at::globalContext().setUserEnabledFlashSDP(false) flag. + // This is useful for debugging purposes. For example, if you want to + // compare the performance of the flash SDP kernels with the unfused + // kernel, you can disable the flash SDP kernels. By disabling + // the math SDP kernel, you can force your code to use flash kernels. + // The math SDP kernel can be disabled by setting + // at::globalContext().setUserEnabledMathSDP(false) flag. + void setSDPUseFlash(bool); + bool userEnabledFlashSDP() const; + + void setSDPUseMemEfficient(bool); + bool userEnabledMemEfficientSDP() const; + + void setSDPUseMath(bool); + bool userEnabledMathSDP() const; + + void setSDPUseCuDNN(bool); + bool userEnabledCuDNNSDP() const; + + void setAllowFP16BF16ReductionMathSDP(bool); + bool allowFP16BF16ReductionMathSDP() const; + + void setSDPUseOverrideable(bool); + bool userEnabledOverrideableSDP() const; + + at::LinalgBackend linalgPreferredBackend() const; + void setLinalgPreferredBackend(at::LinalgBackend); + + at::BlasBackend blasPreferredBackend(); + void setBlasPreferredBackend(at::BlasBackend); + + // Note [Enabling Deterministic Operations] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Operations in PyTorch that normally act nondeterministically, but have an + // alternate deterministic implementation, should satisfy the following + // requirements: + // + // * Include this comment: "See Note [Enabling Deterministic Operations]" + // + // * Check the value of `at::globalContext().deterministicAlgorithms()` to + // toggle + // between nondeterministic and deterministic implementations. + // + // * Have an entry in the list of PyTorch operations that toggle between + // nondeterministic + // and deterministic implementations, in the docstring of + // `use_deterministic_algorithms()` in torch/__init__.py + // + // `example_func()` below shows an example of toggling between + // nondeterministic and deterministic implementations: + // + // void example_func() { + // // See Note [Enabling Deterministic Operations] + // if (at::globalContext().deterministicAlgorithms()) { + // example_func_deterministic(); + // } else { + // example_func_nondeterministic(); + // } + // } + + bool deterministicAlgorithms() const; + bool deterministicAlgorithmsWarnOnly() const; + void setDeterministicAlgorithms(bool, bool); + bool deterministicFillUninitializedMemory() const; + void setDeterministicFillUninitializedMemory(bool); + + // Note [Writing Nondeterministic Operations] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Operations in PyTorch that act nondeterministically and do not have an + // alternate deterministic implementation should satisfy the following + // requirements: + // + // * Include this comment: "See Note [Writing Nondeterministic Operations]" + // + // * Include a comment explaining why the operation is nondeterministic. + // + // * Throw an error when `Context::deterministicAlgorithms()` is true. Most + // of the time, this should be accomplished by calling + // `at::globalContext().alertNotDeterminstic()`. However, if the + // nondeterministic behavior is caused by the CuBLAS workspace + // configuration in CUDA >= 10.2, + // `at::globalContext().alertCuBLASConfigNotDeterministic()` should be + // called instead (in this case, a comment explaining why the operation is + // nondeterministic is not necessary). See below for details on these + // methods. + // + // * Have an entry in the list of nondeterministic PyTorch operations in the + // docstring of `use_deterministic_algorithms()` in torch/__init__.py + // + // * Have a test function in `test/test_torch.py` whose name begins with + // `test_nondeterministic_alert_`. Alternatively, if CuBLAS workspace + // configuration is the reason for nondeterminism, the operation should be + // included in the `test_cublas_config_nondeterministic_alert` test. Any new + // tests should ideally follow a pattern similar to the existing ones. + // + // `example_func()` below shows an example of the comments and error-throwing + // code for a nondeterministic operation: + // + // void example_func() { + // // See Note [Writing Nondeterministic Operations] + // // Nondeterministic because + // at::globalContext().alertNondeterministic("example_func"); + // ... + // } + + // Throws an error if `Context::deterministicAlgorithms()` is true + static void alertNotDeterministic(c10::string_view const& caller); + + // Throws an error if `Context::deterministicAlgorithms()` is true, CUDA + // >= 10.2, and CUBLAS_WORKSPACE_CONFIG is not set to either ":16:8" or + // ":4096:8". For more details: + // https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility + void alertCuBLASConfigNotDeterministic() const; + + void setFloat32MatmulPrecision(const std::string& s); + bool allowTF32CuDNN() const; + void setAllowTF32CuDNN(bool); + bool allowTF32CuBLAS() const; + void setAllowTF32CuBLAS(bool); + Float32MatmulPrecision float32MatmulPrecision() const; + void setFloat32MatmulPrecision(Float32MatmulPrecision p); + bool allowFP16ReductionCuBLAS() const; + void setAllowFP16ReductionCuBLAS(bool); + bool allowBF16ReductionCuBLAS() const; + void setAllowBF16ReductionCuBLAS(bool); + at::QEngine qEngine() const; + void setQEngine(at::QEngine e); + static const std::vector& supportedQEngines(); + static bool isXNNPACKAvailable(); + void setCheckSparseTensorInvariants(bool e); + bool checkSparseTensorInvariants() const; + // This method is used to release the original weight after pre-packing. + // It should be called once before loading/running the model. + // NB: By default it is set to true for mobile builds. + void setReleaseWeightsWhenPrepacking(bool e); + bool releaseWeightsWhenPrepacking() const; + + void setDisplayVmapFallbackWarnings(bool enabled); + bool areVmapFallbackWarningsEnabled() const; + + void setDefaultMobileCPUAllocator(); + void unsetDefaultMobileCPUAllocator(); + bool allowFP16ReductionCPU() const; + void setAllowFP16ReductionCPU(bool); + + private: + void initCUDAIfNeeded(c10::DeviceType p) { + if (p == c10::DeviceType::CUDA) { + lazyInitCUDA(); + } + } + void initHIPIfNeeded(c10::DeviceType p) { + if (p == c10::DeviceType::HIP) { + lazyInitHIP(); + } + } + void initXPUIfNeeded(c10::DeviceType p) { + if (p == c10::DeviceType::XPU) { + lazyInitXPU(); + } + } + static bool checkCuBLASConfigDeterministic(); + c10::once_flag thc_init; + c10::once_flag thh_init; + c10::once_flag thx_init; + c10::once_flag th_mtia_init; + c10::once_flag thp_init; + bool enabled_cudnn = true; + bool deterministic_cudnn = false; + bool deterministic_mkldnn = false; + bool _deterministic_algorithms = false; + bool _deterministic_algorithms_warn_only = false; + bool _deterministic_fill_uninitialized_memory = true; + bool enabled_flashSDP = true; + bool enabled_mem_efficientSDP = true; + bool enabled_mathSDP = true; + bool enabled_cudnnSDP = true; + bool enabled_overrideable = true; + bool allow_fp16_bf16_reduction_mathSDP = false; +#ifdef USE_ROCM + bool benchmark_cudnn = true; +#else + bool benchmark_cudnn = false; +#endif + Float32MatmulPrecision float32_matmul_precision = + c10::utils::check_env("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE") == true + ? at::Float32MatmulPrecision::HIGH + : at::Float32MatmulPrecision::HIGHEST; + int benchmark_limit_cudnn = 10; + bool allow_tf32_cudnn = true; + bool allow_fp16_reduction_cublas = true; + bool allow_bf16_reduction_cublas = true; + bool enabled_mkldnn = true; + bool enabled_nnpack = true; + at::LinalgBackend linalg_preferred_backend = + c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true + ? at::LinalgBackend::Cusolver + : at::LinalgBackend::Default; + at::BlasBackend blas_preferred_backend = +#ifdef USE_ROCM + (c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") != false) +#else + (c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true) +#endif + ? at::BlasBackend::Cublaslt + : at::BlasBackend::Cublas; +#ifdef C10_MOBILE + bool release_original_weights = true; +#else + bool release_original_weights = false; +#endif + bool display_vmap_fallback_warnings_ = false; + std::optional quantized_engine = std::nullopt; + bool enable_sparse_tensor_invariant_checks = false; + bool allow_fp16_reduction_cpu = false; + + Allocator* prev_allocator_ptr_{nullptr}; +}; + +TORCH_API Context& globalContext(); + +inline void init() { + globalContext(); +} + +TORCH_API Allocator* getCPUAllocator(); + +inline DeprecatedTypeProperties& getDeprecatedTypeProperties( + Backend p, + ScalarType s) { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + p, s); +} + +inline DeprecatedTypeProperties& CPU(ScalarType s) { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + Backend::CPU, s); +} + +inline DeprecatedTypeProperties& CUDA(ScalarType s) { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + Backend::CUDA, s); +} + +inline DeprecatedTypeProperties& HIP(ScalarType s) { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + Backend::HIP, s); +} + +inline DeprecatedTypeProperties& MPS(ScalarType s) { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + Backend::MPS, s); +} + +inline bool hasCUDA() { + return globalContext().hasCUDA(); +} + +inline bool hasMTIA() { + return globalContext().hasMTIA(); +} + +inline bool hasHIP() { + return globalContext().hasHIP(); +} + +inline bool hasIPU() { + return globalContext().hasIPU(); +} + +inline bool hasXLA() { + return globalContext().hasXLA(); +} + +inline bool hasMPS() { + return globalContext().hasMPS(); +} + +inline bool hasMAIA() { + return globalContext().hasMAIA(); +} + +inline bool hasXPU() { + return globalContext().hasXPU(); +} + +// Despite its name, this function returns the number of *CUDA* GPUs. +inline size_t getNumGPUs() { + // WARNING: DO NOT ADD LOGIC TO HANDLE OTHER DEVICE TYPES TO THIS + // FUNCTION. If you are interested in interrogating the number of + // devices for a specific device type, add that function to the + // relevant library (e.g., similar to at::cuda::device_count()) + if (hasCUDA() && hasHIP()) { + throw std::runtime_error( + "Enabling both CUDA and HIP in ATen is not supported, as HIP masquerades " + "to be CUDA (e.g., when you say CUDA, on a HIP build of ATen, this actually " + "means HIP. Rebuild PyTorch with one or the other disabled."); + } else if (hasCUDA()) { + return detail::getCUDAHooks().getNumGPUs(); + } else if (hasHIP()) { + return detail::getHIPHooks().getNumGPUs(); + } else { + return 0; + } +} + +inline bool hasOpenMP() { + return globalContext().hasOpenMP(); +} + +inline bool hasMKL() { + return globalContext().hasMKL(); +} + +inline bool hasLAPACK() { + return globalContext().hasLAPACK(); +} + +inline bool hasMAGMA() { + return globalContext().hasMAGMA(); +} + +inline bool hasMKLDNN() { + return globalContext().hasMKLDNN(); +} + +inline void manual_seed(uint64_t seed) { + auto gen = globalContext().defaultGenerator(c10::DeviceType::CPU); + { + // See Note [Acquire lock when using random generators] + std::lock_guard lock(gen.mutex()); + gen.set_current_seed(seed); + } + // NB: Sometimes we build with CUDA, but we don't have any GPUs + // available. In that case, we must not seed CUDA; it will fail! + const auto cuda_num_gpus = detail::getCUDAHooks().getNumGPUs(); + if (hasCUDA() && cuda_num_gpus > 0) { + for (const auto i : c10::irange(cuda_num_gpus)) { + auto cuda_gen = globalContext().defaultGenerator( + Device(at::kCUDA, static_cast(i))); + { + // See Note [Acquire lock when using random generators] + std::lock_guard lock(cuda_gen.mutex()); + cuda_gen.set_current_seed(seed); + } + } + } + + const auto xpu_num_gpus = detail::getXPUHooks().getNumGPUs(); + if (hasXPU() && xpu_num_gpus) { + for (const auto i : c10::irange(xpu_num_gpus)) { + auto xpu_gen = globalContext().defaultGenerator( + Device(at::kXPU, static_cast(i))); + { + // See Note [Acquire lock when using random generators] + std::lock_guard lock(xpu_gen.mutex()); + xpu_gen.set_current_seed(seed); + } + } + } + + if (hasMPS()) { + auto mps_gen = globalContext().defaultGenerator(c10::DeviceType::MPS); + // See Note [Acquire lock when using random generators] + std::lock_guard lock(mps_gen.mutex()); + mps_gen.set_current_seed(seed); + } +} + +// When the global flag `allow_tf32` is set to true, cuBLAS handles are +// automatically configured to use math mode CUBLAS_TF32_TENSOR_OP_MATH. +// For some operators, such as addmv, TF32 offers no performance improvement +// but causes precision loss. To help this case, this class implements +// a RAII guard that can be used to quickly disable TF32 within its scope. +// +// Usage: +// NoTF32Guard disable_tf32; +struct TORCH_API NoTF32Guard { + NoTF32Guard(); + ~NoTF32Guard(); + static bool should_disable_tf32(); + + private: + bool changed = false; +}; + +struct TORCH_API ROCmBackwardPassGuard { + ROCmBackwardPassGuard(); + ~ROCmBackwardPassGuard(); + static bool is_backward_pass(); +}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Device.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Device.h new file mode 100644 index 0000000000000000000000000000000000000000..6c515580363c9e9aab3ee322678fd0cb0283aec8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Device.h @@ -0,0 +1,2 @@ +#pragma once +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/DeviceAccelerator.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/DeviceAccelerator.h new file mode 100644 index 0000000000000000000000000000000000000000..7840911bd6b25be94c98ff2a636bcec3b8e21512 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/DeviceAccelerator.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +#include +#include + +// This file defines the top level Accelerator concept for PyTorch. +// A device is an accelerator per the definition here if: +// - It is mutually exclusive with all other accelerators +// - It performs asynchronous compute via a Stream/Event system +// - It provides a set of common APIs as defined by AcceleratorHooksInterface +// +// As of today, accelerator devices are (in no particular order): +// CUDA, MTIA, XPU, HIP, MPS, PrivateUse1 + +namespace at { + +// Ensures that only one accelerator is available (at +// compile time if possible) and return it. +// When checked is true, the returned optional always has a value. +TORCH_API std::optional getAccelerator(bool checked = false); + +TORCH_API bool isAccelerator(c10::DeviceType d); + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b98d648c684546c4bce946dc777cf62e3fc795b4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h @@ -0,0 +1,808 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ +#include // For CUDA_VERSION +#endif + +#ifdef TEMPLATE_SELECTIVE_BUILD +#include +#else +namespace at { +/** + * The method should_include_kernel_dtype() returns true/false + * based on whether the switching code for a specific dtype should be + * included based on build time constants generated from tracing model + * execution. This method will be implemented via code-generation and + * included in this file when code-gen is ready. + */ +inline constexpr bool should_include_kernel_dtype( + const char* /*kernel_tag_str*/, + at::ScalarType /*scalar_type*/ +) { + return true; +} +} // namespace at +#endif + +/** + * In the Facebook internal build (using BUCK), this macro is enabled by + * passing in -c pt.enable_record_kernel_dtype=1 when building the tracer + * binary. + */ +#if defined ENABLE_RECORD_KERNEL_FUNCTION_DTYPE +namespace at { +namespace detail { +TORCH_API void record_kernel_function_dtype(std::string name); +} +} // namespace at + +#define RECORD_KERNEL_FUNCTION_DTYPE(NAME, enum_type) \ + at::detail::record_kernel_function_dtype( \ + std::string(NAME) + "$" + toString(enum_type)); +#else +#define RECORD_KERNEL_FUNCTION_DTYPE(NAME, enum_type) +#endif + +#define AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type) \ + do { \ + if constexpr (!at::should_include_kernel_dtype( \ + at_dispatch_name, enum_type)) { \ + AT_ERROR( \ + "dtype '", \ + toString(enum_type), \ + "' not selected for kernel tag ", \ + at_dispatch_name); \ + } \ + } while (0) + +#define AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, HINT, ...) \ + case enum_type: { \ + AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \ + using HINT C10_UNUSED = c10::impl::ScalarTypeToCPPTypeT; \ + return __VA_ARGS__(); \ + } + +#define AT_DISPATCH_CASE(enum_type, ...) \ + AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__) + +#define AT_DISPATCH_CASE_QINT(enum_type, scalar_type, ...) \ + case enum_type: { \ + AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \ + using scalar_t = scalar_type; \ + using underlying_t C10_UNUSED = typename scalar_t::underlying; \ + const auto& SCALAR_TYPE C10_UNUSED = enum_type; \ + const auto& UNDERLYING_TYPE C10_UNUSED = toUnderlying(enum_type); \ + return __VA_ARGS__(); \ + } + +#define AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \ + enum_type, scalar_type, bitwidth, qmin, qmax, ...) \ + case enum_type: { \ + AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \ + using scalar_t = scalar_type; \ + using underlying_t C10_UNUSED = typename scalar_t::underlying; \ + const auto& SCALAR_TYPE C10_UNUSED = enum_type; \ + const auto& UNDERLYING_TYPE C10_UNUSED = toUnderlying(enum_type); \ + C10_UNUSED int bit_width = bitwidth; \ + C10_UNUSED int64_t quant_min = qmin; \ + C10_UNUSED int64_t quant_max = qmax; \ + return __VA_ARGS__(); \ + } + +namespace detail { + +inline at::ScalarType scalar_type(at::ScalarType s) { + return s; +} + +C10_DEPRECATED_MESSAGE( + "passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, " + "pass an at::ScalarType instead") +inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) { + return t.scalarType(); +} + +C10_DEPRECATED_MESSAGE( + "AT_DISPATCH_ALL_TYPES_AND_HALF is deprecated, " + "use AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, ...) instead") +inline void deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF() {} + +C10_DEPRECATED_MESSAGE( + "AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX is deprecated, " + "use AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(at::ScalarType::Half, ...) " + "instead") +inline void deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX() {} + +} // namespace detail + +// The AT_DISPATCH_* family of macros provides the ability to +// conveniently generate specializations of a kernel over all of the +// dtypes we care about in PyTorch. We call it "dispatch" because +// we are "dispatching" to the correct, dtype-specific kernel. +// +// A standard usage looks like: +// +// AT_DISPATCH_ALL_TYPES(self.scalar_type(), "op_name", [&] { +// // Your code here, with 'scalar_t' now defined to +// // be the dtype in question +// }); +// +// There are many variations of this macro, so it's important to +// understand exactly /which/ dtypes you want to get instantiated, as +// well as what the "default" set is. +// +// The default set of dtypes that are instantiated (e.g., by +// AT_DISPATCH_ALL_TYPES) are floating point types (float, double), +// and integral types (int32_t, int64_t, int16_t, int8_t, uint8_t), +// but NOT booleans (bool), half-precision floats (Half) or +// complex number (c10::complex, c10::complex). +// This "cut" is somewhat historical (the default types are the +// ones that TH historically supported), but it also reflects the +// fact that the non-default types are "poorly" behaved (booleans +// are NOT integers mod 2, half precision operations ~essentially +// don't exist on CPU, complex numbers are an experimental application). +// +// Here are the questions you should generally ask to decide which +// dispatch you want: +// +// 1. Is this an integral or floating point specific operation? +// (If so, you'll want one of the FLOATING or INTEGRAL macros.) +// +// 2. Should half be supported? (If you're on CPU, the answer is almost +// definitely no. If you do want support, use one of the AND_HALF +// macros) +// +// Much rarer situations: +// +// 3. Should bool be supported? (You often have to write your kernel +// differently if arithmetic operations are involved.) If so, +// Use AT_DISPATCH_ALL_TYPES_AND along with ScalarType::Bool +// +// 4. Should complex be supported? The answer is almost always no, +// unless you are working on "generic" code that should work on +// all dtypes. +// +// Parameters: +// ----------- +// +// 1. The NAME argument is a "tag" that is used to trace and then +// conditionally compile fragments of the case statements such +// that the kernel functions are specialized only for the dtypes +// that are needed. The NAME parameter *must* be a build time +// const char* (can't be std::string, etc...) +// +// Please ensure that the NAME is unique for every implementation +// or you run the risk of over-including code for the kernel +// functions. There is no risk of missing out on any code, so +// it's mostly a risk of a Type-2 error, and not a Type-1 error. +// +// Switch-like syntax: +// ------------------- +// There is also a switch-case like syntax which is useful if a kernel +// needs to be specialized for particular scalar types +// +// AT_DISPATCH_SWITCH(self.scalar_type(), "op_name", +// AT_DISPATCH_CASE_INTEGRAL_TYPES([&] { +// op_integral(iter); +// }) +// AT_DISPATCH_CASE_FLOATING_TYPES([&] { +// op_floating(iter); +// }) +// AT_DISPATCH_CASE(kBool, [&] { +// op_bool(iter); +// }) +// ); +// +// For each AT_DISPATCH_FOO macro, there is a corresponding +// AT_DISPATCH_CASE_FOO macro which can be used inside of an +// AT_DISPATCH_SWITCH block. + +// NB: the the_type variable is not used, but we have kept it for +// backwards compatibility. It's probably not used by anyone though; +// but we're just being safe (and it doesn't hurt.) Note we must +// use it to shut up warnings about unused store. + +#define AT_DISPATCH_SWITCH(TYPE, NAME, ...) \ + [&] { \ + const auto& the_type = TYPE; \ + constexpr const char* at_dispatch_name = NAME; \ + /* don't use TYPE again in case it is an expensive or side-effect op */ \ + at::ScalarType _st = ::detail::scalar_type(the_type); \ + RECORD_KERNEL_FUNCTION_DTYPE(at_dispatch_name, _st); \ + switch (_st) { \ + __VA_ARGS__ \ + default: \ + AT_ERROR( \ + '"', \ + at_dispatch_name, \ + "\" not implemented for '", \ + toString(_st), \ + "'"); \ + } \ + }() + +#define AT_DISPATCH_CASE_FLOATING_TYPES(...) \ + AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_TYPES_AND_HALF(...) \ + AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES_AND_HALF(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_REDUCED_FLOATING_TYPES(...) \ + AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) + +#define AT_DISPATCH_REDUCED_FLOATING_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_REDUCED_FLOATING_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_TYPES_AND(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_TYPES_AND(SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_TYPES_AND(SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, ...) \ + AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_TYPES_AND2( \ + SCALARTYPE1, SCALARTYPE2, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_TYPES_AND2( \ + SCALARTYPE1, SCALARTYPE2, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, ...) \ + AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_TYPES_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, ...) \ + AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_TYPES_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_TYPES_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_COMPLEX_TYPES(...) \ + AT_DISPATCH_CASE(at::ScalarType::ComplexDouble, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::ComplexFloat, __VA_ARGS__) + +#define AT_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_COMPLEX_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_COMPLEX_TYPES_AND(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_COMPLEX_TYPES_AND(SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_COMPLEX_TYPES_AND(SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(...) \ + AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE_COMPLEX_TYPES(__VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND1(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1( \ + SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND1( \ + SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND2( \ + SCALARTYPE1, SCALARTYPE2, ...) \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2( \ + SCALARTYPE1, SCALARTYPE2, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND2( \ + SCALARTYPE1, SCALARTYPE2, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, ...) \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, ...) \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND5( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, SCALARTYPE5, ...) \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE5, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND5( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + TYPE, \ + NAME, \ + ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND5( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + __VA_ARGS__)) + +#define AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND6( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + ...) \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE5, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE6, __VA_ARGS__) + +#define AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND6( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + TYPE, \ + NAME, \ + ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_FLOATING_AND_COMPLEX_TYPES_AND6( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + __VA_ARGS__)) + +#define AT_DISPATCH_CASE_INTEGRAL_TYPES(...) \ + AT_DISPATCH_CASE(at::ScalarType::Byte, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Char, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Int, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Long, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Short, __VA_ARGS__) + +#define AT_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_INTEGRAL_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_INTEGRAL_TYPES_AND(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_INTEGRAL_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_INTEGRAL_TYPES_AND(SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_INTEGRAL_TYPES_AND(SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES(...) \ + AT_DISPATCH_CASE_INTEGRAL_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_QINT_TYPES(...) \ + AT_DISPATCH_CASE_QINT(at::kQInt8, at::qint8, __VA_ARGS__) \ + AT_DISPATCH_CASE_QINT(at::kQUInt8, at::quint8, __VA_ARGS__) \ + AT_DISPATCH_CASE_QINT(at::kQInt32, at::qint32, __VA_ARGS__) + +#define AT_DISPATCH_QINT_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_QINT_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_QINT_TYPES_AND(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_QINT_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_QINT_TYPES_AND(SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_QINT_TYPES_AND(SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_QINT_BYTE_TYPES(...) \ + AT_DISPATCH_CASE_QINT(at::kQInt8, at::qint8, __VA_ARGS__) \ + AT_DISPATCH_CASE_QINT(at::kQUInt8, at::quint8, __VA_ARGS__) + +#define AT_DISPATCH_QINT_BYTE_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_QINT_BYTE_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_QINT_AND_SUB_BYTE_TYPES(...) \ + AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \ + at::kQInt8, at::qint8, CHAR_BIT, SCHAR_MIN, SCHAR_MAX, __VA_ARGS__) \ + AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \ + at::kQUInt8, at::quint8, CHAR_BIT, 0, UCHAR_MAX, __VA_ARGS__) \ + AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \ + at::kQInt32, \ + at::qint32, \ + CHAR_BIT * sizeof(int), \ + INT_MIN, \ + INT_MAX, \ + __VA_ARGS__) \ + AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \ + at::kQUInt4x2, at::quint4x2, 4, 0, 15, __VA_ARGS__) \ + AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \ + at::kQUInt2x4, at::quint2x4, 2, 0, 3, __VA_ARGS__) + +#define AT_DISPATCH_QINT_AND_SUB_BYTE_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_QINT_AND_SUB_BYTE_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(...) \ + AT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE_COMPLEX_TYPES(__VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND(SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, NAME, AT_DISPATCH_CASE_ALL_TYPES_AND(SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND(SCALARTYPE, ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(SCALARTYPE, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND(SCALARTYPE, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, ...) \ + AT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND2( \ + SCALARTYPE1, SCALARTYPE2, ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2( \ + SCALARTYPE1, SCALARTYPE2, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND2( \ + SCALARTYPE1, SCALARTYPE2, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, ...) \ + AT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND3( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND4( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND5( \ + SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, SCALARTYPE4, SCALARTYPE5, ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE5, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND5( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + TYPE, \ + NAME, \ + ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND5( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND6( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE5, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE6, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND6( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + TYPE, \ + NAME, \ + ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND6( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND7( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE5, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE6, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE7, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND7( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + TYPE, \ + NAME, \ + ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND7( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + __VA_ARGS__)) + +#define AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND8( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + SCALARTYPE8, \ + ...) \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX(__VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE1, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE2, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE3, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE4, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE5, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE6, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE7, __VA_ARGS__) \ + AT_DISPATCH_CASE(SCALARTYPE8, __VA_ARGS__) + +#define AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND8( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + SCALARTYPE8, \ + TYPE, \ + NAME, \ + ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND8( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + SCALARTYPE8, \ + __VA_ARGS__)) + +#define AT_DISPATCH_CASE_BIT_TYPES(...) \ + AT_DISPATCH_CASE(at::ScalarType::Bits1x8, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Bits2x4, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Bits4x2, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Bits8, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Bits16, __VA_ARGS__) + +#define AT_DISPATCH_BIT_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_BIT_TYPES(__VA_ARGS__)) + +#define AT_DISPATCH_INDEX_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_PRIVATE_CASE_TYPE_USING_HINT( \ + at::ScalarType::Int, index_t, __VA_ARGS__) \ + AT_PRIVATE_CASE_TYPE_USING_HINT( \ + at::ScalarType::Long, index_t, __VA_ARGS__)) + +// ---------------------------------------------------------------------------- +// DEPRECATED MACROS, DON'T USE THESE +// ---------------------------------------------------------------------------- + +#define AT_DISPATCH_ALL_TYPES_AND_HALF(TYPE, NAME, ...) \ + detail::deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF(); \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND(at::ScalarType::Half, __VA_ARGS__)) diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/EmptyTensor.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/EmptyTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..e34be30f960712a9a6306383f9ffcfbde29d32a2 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/EmptyTensor.h @@ -0,0 +1,166 @@ +#pragma once +#include + +namespace at::detail { + +inline void check_size_nonnegative(ArrayRef size) { + for (const auto& x : size) { + TORCH_CHECK( + x >= 0, + "Trying to create tensor with negative dimension ", + x, + ": ", + size); + } +} + +inline void check_size_nonnegative(ArrayRef size) { + for (const auto& x : size) { + TORCH_CHECK( + x.expect_size(__FILE__, __LINE__), + "Trying to create tensor with negative dimension ", + x, + ": ", + size); + } +} + +TORCH_API size_t computeStorageNbytesContiguous( + IntArrayRef sizes, + size_t itemsize, + size_t storage_offset = 0); +TORCH_API SymInt computeStorageNbytesContiguous( + SymIntArrayRef sizes, + const SymInt& itemsize, + const SymInt& storage_offset = 0); +TORCH_API size_t computeStorageNbytes( + IntArrayRef sizes, + IntArrayRef strides, + size_t itemsize, + size_t storage_offset = 0); +TORCH_API SymInt computeStorageNbytes( + SymIntArrayRef sizes, + SymIntArrayRef strides, + const SymInt& itemsize, + const SymInt& storage_offset = 0); + +TORCH_API TensorBase empty_generic( + IntArrayRef size, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type, + std::optional memory_format_opt); + +TORCH_API TensorBase empty_generic_symint( + SymIntArrayRef size, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type, + std::optional memory_format_opt); + +TORCH_API TensorBase empty_strided_generic( + IntArrayRef size, + IntArrayRef stride, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type); + +TORCH_API TensorBase empty_strided_symint_generic( + SymIntArrayRef size, + SymIntArrayRef stride, + c10::Allocator* allocator, + c10::DispatchKeySet ks, + ScalarType scalar_type); + +TORCH_API TensorBase empty_cpu( + IntArrayRef size, + ScalarType dtype, + bool pin_memory = false, + std::optional memory_format_opt = std::nullopt); + +TORCH_API TensorBase empty_cpu( + IntArrayRef size, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt, + std::optional memory_format_opt); + +TORCH_API TensorBase empty_cpu(IntArrayRef size, const TensorOptions& options); + +TORCH_API TensorBase empty_strided_cpu( + IntArrayRef size, + IntArrayRef stride, + ScalarType dtype, + bool pin_memory = false); + +TORCH_API TensorBase empty_strided_cpu( + IntArrayRef size, + IntArrayRef stride, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt); + +TORCH_API TensorBase empty_strided_cpu( + IntArrayRef size, + IntArrayRef stride, + const TensorOptions& options); + +TORCH_API TensorBase empty_meta( + IntArrayRef size, + ScalarType dtype, + std::optional memory_format_opt = std::nullopt); + +TORCH_API TensorBase empty_meta( + IntArrayRef size, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt, + std::optional memory_format_opt); + +TORCH_API TensorBase empty_symint_meta( + SymIntArrayRef size, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt, + std::optional memory_format_opt); + +TORCH_API TensorBase empty_meta(IntArrayRef size, const TensorOptions& options); + +TORCH_API TensorBase +empty_strided_meta(IntArrayRef size, IntArrayRef stride, ScalarType dtype); + +TORCH_API TensorBase empty_strided_meta( + IntArrayRef size, + IntArrayRef stride, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt); + +TORCH_API TensorBase empty_strided_meta( + IntArrayRef size, + IntArrayRef stride, + const TensorOptions& options); + +TORCH_API TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + ScalarType dtype); + +TORCH_API TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt); + +TORCH_API TensorBase empty_strided_symint_meta( + SymIntArrayRef size, + SymIntArrayRef stride, + const TensorOptions& options); + +} // namespace at::detail diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/ExpandBase.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/ExpandBase.h new file mode 100644 index 0000000000000000000000000000000000000000..8db6be6a643c8cb60cab8487478f9a2f0c817d8b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/ExpandBase.h @@ -0,0 +1,30 @@ +#include + +// Broadcasting utilities for working with TensorBase +namespace at { +namespace internal { +TORCH_API TensorBase expand_slow_path(const TensorBase& self, IntArrayRef size); +} // namespace internal + +inline c10::MaybeOwned expand_size( + const TensorBase& self, + IntArrayRef size) { + if (size.equals(self.sizes())) { + return c10::MaybeOwned::borrowed(self); + } + return c10::MaybeOwned::owned( + at::internal::expand_slow_path(self, size)); +} +c10::MaybeOwned expand_size(TensorBase&& self, IntArrayRef size) = + delete; + +inline c10::MaybeOwned expand_inplace( + const TensorBase& tensor, + const TensorBase& to_expand) { + return expand_size(to_expand, tensor.sizes()); +} +c10::MaybeOwned expand_inplace( + const TensorBase& tensor, + TensorBase&& to_expand) = delete; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h new file mode 100644 index 0000000000000000000000000000000000000000..b648ef616284f971ab51e2bf9d8e7dd557237bb7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include + +namespace at::functorch { + +// NOTE [functorch TLS in pytorch/pytorch] +// +// functorch lives out-of-tree. However, it has some TLS that needs to be +// propagated. The solution for that is we store a pointer to the TLS +// inside pytorch/pytorch and extend FuncTorchTLSBase inside functorch to +// include whatever functorch needs. +// +// We need to store a pointer due to the indirection: +// inside functorch, we will create a subclass of FunctorchTLSBase called +// FuncTorchTLSImpl that actually contains metadata, like the DynamicLayerStack. +// FuncTorchTLSBase doesn't have any metadata because it hasn't been defined +// yet. +// +// Here in pytorch/pytorch, we will pass around FuncTorchTLSBase*, but inside +// functorch, we will assign a FuncTorchTLSImpl* to the FunctorchTLSBase*. +// We can't directly pass around FunctorchTLSBase (without a pointer) because +// FuncTorchTLSImpl does not fit inside a FuncTorchTLSBase by virtue of having +// more elements. +struct TORCH_API FuncTorchTLSBase { + virtual ~FuncTorchTLSBase() = default; + virtual std::unique_ptr deepcopy() const = 0; + + virtual int64_t checkSupportsSingleLevelAutogradFunction() const = 0; + virtual void checkSupportsCppAutogradFunction() const = 0; + virtual void checkSupportsInplaceRequiresGrad() const = 0; + virtual void checkSupportsRetainGrad() const = 0; +}; + +// returns deepcopy of the functorch tls +TORCH_API std::unique_ptr getCopyOfFuncTorchTLS(); + +// sets the functorch tls. always does a deep copy. +TORCH_API void setFuncTorchTLS( + const std::shared_ptr& state); + +// get a mutable reference to the functorch tls +TORCH_API std::unique_ptr& functorchTLSAccessor(); + +} // namespace at::functorch diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/FunctionalStorageImpl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/FunctionalStorageImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..3f80171196fbc7bdb00c79d725ba521f49455d3d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/FunctionalStorageImpl.h @@ -0,0 +1,208 @@ +#pragma once + +#include + +#include + +namespace at::functionalization { + +// See Note [Functionalization Pass In Core] + +// ViewMeta is a class used by the functionalization pass to navigate between +// a base tensor and a view tensor. +// For example, if I call `b = a.view1(...)` +// the functionalization pass will generate and store a ViewMeta on b that looks +// like: +// +// ViewMeta( +// [](const Tensor& base, int64_t mutated_view_idx) { +// return base.view1(...); +// }, +// [](const at::Tensor& base, const at::Tensor& mutated_view, +// int64_t mutated_view_idx) -> at::Tensor { +// return at::functionalization::impl::view1_inverse(base, mutated_view, +// ...); +// } +// +// The forward_fn lambda describes how to replay view1 on a tensor. +// +// The reverse_fn lambda describes how, given a tensor that is already a view, +// how to get the corresponding base tensor. See Note [Functionalization Pass: +// View Inverses] for details. +struct ViewMeta { + ViewMeta( + std::function forward, + std::function reverse, + bool has_symbolic_inputs, + bool is_multi_output = false, + bool is_as_strided = false, + int64_t out_idx = 0) + : forward_fn(std::move(forward)), + reverse_fn(std::move(reverse)), + out_index(out_idx), + is_multi_output(is_multi_output), + is_as_strided(is_as_strided), + has_symbolic_inputs(has_symbolic_inputs) {} + + std::function forward_fn; + std::function reverse_fn; + // See Note [out_idx in ViewMeta] + int64_t out_index; + + // Tells us if this is a multi-output view + bool is_multi_output; + + bool is_as_strided; + + // Tells us if this view operation has any symbolic inputs + bool has_symbolic_inputs; + + // Returns a copy of the current ViewMeta, if out_idx matches the current + // out_index. Otherwise, returns a new ViewMeta with the same forward/reverse + // functions, but a new out index. + ViewMeta to_out_idx(int64_t out_idx); +}; + +// FunctionalStorageImpl is a subclass of StorageImpl used by the +// functionalization pass. It has no underlying data (similar to meta storage). +// It also knows how to reflect mutations to tensors in the absence of a valid +// data pointer. +// +// A storage represents the state shared by (potentially multiple) views of the +// same tensor. For example, in the following code: +// +// b = a.view1(...) +// c = b.view2(...) +// b.add_(1) +// --> storage.add_update(b, {view1_meta}) +// +// The call to add_(1) will result in a call to alias.add_update(b, +// {view1_meta}), queueing up the mutation from b onto the alias. Later, suppose +// c is used in an expression (e.g. you try to print c, or pass it to an +// operator). Doing so will involve "syncing" c. First we apply any pending +// updates to the alias, and then we regenerate c by replaying its views off of +// the updated alias. E.g: +// +// print(str(c)) +// --> c.sync_() +// --> alias.apply_updates() // after this, the alias will be updated to +// reflect the mutation to b +struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl { + public: + struct Update { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::Tensor new_val; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector view_metas; + }; + + explicit FunctionalStorageImpl(const Tensor& value); + + void add_update( + const Tensor& updated_val, + const std::vector& view_metas); + bool apply_updates(); + const Tensor& base() { + return base_; + } + size_t generation() const { + return generation_; + } + void freeze() { + frozen_ = true; + } + + c10::SymInt get_storage_size(bool before) { + if (before) { + return original_storage_size_; + } else { + return curr_storage_size_; + } + } + + ~FunctionalStorageImpl() override = default; + + void mark_mutation() { + mutation_counter_++; + } + void mark_mutation_during_no_grad_or_inference_mode() { + mutation_counter_during_no_grad_or_inference_mode_++; + } + void mark_mutation_hidden_from_autograd() { + mutation_counter_hidden_from_autograd_++; + } + + bool are_all_mutations_under_no_grad_or_inference_mode() const { + auto non_autograd_mutations = + mutation_counter_during_no_grad_or_inference_mode_ + + mutation_counter_hidden_from_autograd_; + // The <= is because both counters will technically be incremented, if we + // perform e.g. a triton kernel mutation under no_grad + return mutation_counter_ <= non_autograd_mutations; + } + + bool are_all_mutations_hidden_from_autograd() const { + // mutations under no_grad / inference_mode are technically not hidden from + // autograd - they change the version counter + return mutation_counter_ <= mutation_counter_hidden_from_autograd_; + } + + void mark_inductor_storage_resize(c10::SymInt new_size) { + inductor_storage_resized_ = true; + curr_storage_size_ = std::move(new_size); + } + + bool was_inductor_storage_resized() { + return inductor_storage_resized_; + } + + private: + // NB: base_ should always point to a tensor BELOW the current + // functionalization layer. This is mainly to avoid reference cycles. e.g. + // given `b = a.view(...)` Both a.storage_ and b.storage_ are a + // FunctionStorageImpl containing an Walualias, with contains a Tensor + // `base_`. In this case (where a and b are FunctionalTensorWrapper's), base_ + // should point not to a, but to a's unwrapped value, a.value_` See Note + // [Functionalization: Walualias Removal] for a diagram that shows this + // visually. + at::Tensor base_; + std::vector updates_; + // generation_ gets incremented every time a mutation is queued onto the + // alias. It is used to determine if a given tensor is "up to date", or if it + // needs to be regenerated from the alias. + size_t generation_ = 0; + // If frozen, no more mutations are allowed on this storage. Once frozen, a + // storage cannot be unfrozen. + bool frozen_ = false; + + // These mutation counters are bumped on the storage + // whenever a FunctionalTensorWrapper experiences a mutation. + // When the mutation is under no_grad, or comes from a triton kernel, we also + // bump the corresponding during_no_grad or hidden_from_autograd counters. Why + // do we need to detect these two situations separately from "normal" input + // mutations? (1) "normal" input mutations can mutate autograd metadata like + // .grad_fn, + // in which case they need to be replayed outside of the compiled graph + // (2) "no_grad" input mutations are generally safe to keep in the graph (and + // compile), + // but they bump the tensor's VC, so we need to mark_dirty() on the inputs + // in torch.compile + // (3) mutations that are fully hidden from autograd (e.g. from a triton + // kernel) + // do not mutate any autograd state, and be fully kept in the graph + // When we detect that an input was mutated, we need to be able to tell if: + // (1) all of the mutations were from triton kernels + // (2) all of the mutations were under no_grad + uint64_t mutation_counter_during_no_grad_or_inference_mode_ = 0; + uint64_t mutation_counter_ = 0; + uint64_t mutation_counter_hidden_from_autograd_ = 0; + + // Used to tell if: + // (1) There were any storage resizes on a graph input + // (2) The original/curr storage size tell us if these resizes result in a nop + bool inductor_storage_resized_ = false; + c10::SymInt original_storage_size_; + c10::SymInt curr_storage_size_; +}; + +} // namespace at::functionalization diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/FunctionalTensorWrapper.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/FunctionalTensorWrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..ed5daf90e5f44ba9256efa81637a2d370962f4b6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/FunctionalTensorWrapper.h @@ -0,0 +1,454 @@ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace at { + +// Note [Functionalization Pass In Core] +// The Functionalization pass is used to remove aliasing from a pytorch program. +// +// This is useful for backends that don't support aliasing, like XLA and Vulkan. +// It's also necessary in order to remove mutation from a program, which is +// needed in Functorch. +// +// Consider this program: +// a = torch.ones(...) +// b = a.view(...) +// b.add_(1) +// +// In this program, b is meant to alias with a due to the use of view(). At the +// end of the program, both a and b are full of 2's. However, backends that +// don't support aliasing aren't able to correctly implement the view() +// operator. Instead, they can opt into the Functionalization pass, which will +// sit between the user and the backend, and provide the necessary aliasing +// logic. +// +// The functionalization pass will turn the above program into a slightly +// different program that has the same semantics, transparently to the user, +// that backends like XLA/Vulkan are able to implement a = torch.ones(...) b = +// a.view_copy(...) # view() replaced with view_copy(). Backends like +// XLA/Vulkan can implement this! b.add_(1) a.add_(1) # Our functionalization +// pass machinery knows that a and b are aliased - it applies b's mutation to a +// too. +// +// So, how does the functionalization pass keep track of which tensors are +// aliased? The pass works by wrapping EVERY tensor in the program inside of a +// FunctionalTensorWrapper, which knows about its alias'd tensors. +// +// See Note [Functionalization: Alias Removal] for details on the aliasing +// machinery. See Note [Functionalization: Mutation Removal] for details on +// mutation removal. +struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl { + explicit FunctionalTensorWrapper(const Tensor& value); + // Additional constructor to create a FunctionalTensorWrapper directly from an + // underlying tensor that was created from a view. For example, the code b = + // a.view1() will generate a constructor call to FunctionalTensorWrapper(b, a, + // view1_meta) + explicit FunctionalTensorWrapper( + const Tensor& view_value, + const FunctionalTensorWrapper* base, + const functionalization::ViewMeta& meta); + + // Get the underlying, actual tensor, that doesn't know anything about + // functionalization. + const Tensor& value() const { + return value_; + }; + // The concept of "level" is only ever important to functorch; it's exposed + // here as more of a hook for functorch to use. + int64_t level() const { + return level_; + }; + void set_level(int64_t level) { + level_ = level; + } + bool has_metadata_mutation() const { + return has_metadata_mutation_; + }; + + void mark_mutation() { + functional_storage_impl()->mark_mutation(); + } + // Denotes a mutation that's hidden from autograd, + // e.g. for the purposes of passing a tensor to a triton kernel + void mark_mutation_hidden_from_autograd() { + functional_storage_impl()->mark_mutation_hidden_from_autograd(); + } + void mark_mutation_during_no_grad_or_inference_mode() { + functional_storage_impl()->mark_mutation_during_no_grad_or_inference_mode(); + } + // Are all the mutations happening to the tensor hidden from autograd + bool are_all_mutations_hidden_from_autograd() const { + return functional_storage_impl()->are_all_mutations_hidden_from_autograd(); + } + // Did all mutations happen under no_grad or inference_mode + // (We also need to ignore mutations fully hidden from autograd here) + bool are_all_mutations_under_no_grad_or_inference_mode() const { + return functional_storage_impl() + ->are_all_mutations_under_no_grad_or_inference_mode(); + } + + void maybe_mark_symbolic(const functionalization::ViewMeta& meta) { + is_symbolic_ = is_symbolic_ | meta.has_symbolic_inputs; + } + + bool is_symbolic() const { + return is_symbolic_; + } + + // Runs the forward_fn of every ViewMeta collected in the current instance + // to some other base. + Tensor apply_view_metas(const Tensor& base); + + // Sync's the underlying tensor with its alias, if it's out of date. This + // involves two steps: 1) Apply any pending updates/mutations to the alias 2) + // Replay the views (if any) to regenerate the current tensor off of the + // updated alias. + void sync_(); + // Performs step (1) of the sync. This is its own public API because it's + // needed by view_inplace ops like transpose_. See Note [Functionalization + // Pass - Inplace View Ops] + void regenerate_from_base(); + // Performs step (2) of the sync. This is its own public API because it's + // needed by functorch. functorch wants to make sure that all input tensors to + // a functionalized program have been properly synced so it can properly + // propagate mutations to inputs. It can't just call sync_(), because the + // FunctionalTensorWrapper will look like it has no aliases and sync_ will be + // a noop. We use the reference count on storage_ to determine if the wrapper + // is aliased, and by the time functorch is ready to propagate updates to + // inputs, any intermediate views of the input created by the program will + // have been deallocated. This function also returns whether or not the base + // actually had any updates to apply. + bool apply_updates(); + // Takes the current state of value_ and snapshots it, sending it as a pending + // update to the alias. + void commit_update(); + // When any tensor is mutated, the tensor increments its alias's "generation". + // Separately, each tensor maintains its own "generation" counter, which is + // used to determine if it's up-to-date with its alias. The act of syncing a + // tensor will set a tensor's generation equal to its alias's generation. + bool is_up_to_date() const; + // Freezes the storage of this tensor, preventing subsequent mutations + void freeze_storage() const; + // Every FunctionalTensorWrapper contains a vector objects + // describing the series of view ops that ran to generate the current tensor + // from the base tensor. This method is used by inplace-view ops like + // transpose_. It appends a ViewMeta to the existing stack, and refreshes the + // tensor by replaying the views off of the alias. + void mutate_view_meta(const at::functionalization::ViewMeta& meta); + + // Custom implementation of self.set_(src) + void set__impl(const FunctionalTensorWrapper* other); + + // Custom implementation of resize_storage_bytes_(self, new_size) + void storage_resize_(const c10::SymInt& new_size); + + // Returns whether the current tensor's data was ever mutated + bool has_data_mutation(); + // + // Returns whether the current FunctionalTensorWrapper + // experienced a set_() call. + bool was_storage_changed() { + return was_storage_changed_; + } + + void set_storage_changed() { + was_storage_changed_ = true; + } + + // A FunctionalTensor is considered a base if its not a view of another + // tensor. + bool isBaseTensor() const { + return view_metas_.empty(); + } + + c10::SymInt get_storage_size(bool before) { + return functional_storage_impl()->get_storage_size(before); + } + + // Returns whether the FunctionalTensor experienced an + // untyped_storage().resize_() call + bool was_inductor_storage_resized() { + return functional_storage_impl()->was_inductor_storage_resized(); + } + + // The functionalization pass can be used to remove mutations. + // It does so by replacing any mutation op with it's corresponding + // out-of-place op, followed by a call to replace_(). e.g: + // + // a.add_(1) + // + // will turn into: + // + // tmp = a.add(1) + // a.replace_(tmp) + // + // replace_() swaps out the wrapped tensor, value_, with tmp. + void replace_(const Tensor& other, bool from_lazy_regenerate = false); + + bool is_multi_output_view() { + return is_multi_output_view_; + } + + // See Note[resize_() in functionalization pass] + void maybe_replace_storage(const Tensor& other); + + // Replaces the storage with a new functional storage, + // and clears the view_metas_ stack. + // WARNING: Calling this function will sever the aliasing relationship between + // the current FunctionalTensorWrapper and any of its outstanding aliases. + // Please only call if you know what you're doing. + void _unsafe_reset_storage(); + + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override; + + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override; + + ~FunctionalTensorWrapper() override = default; + + // FunctionalTensorWrapper overrides all custom size/stride function, + // so that if the inner tensor has a custom implementation + // we make sure to call that implementation. + at::IntArrayRef sizes_custom() const override; + at::IntArrayRef strides_custom() const override; + int64_t dim_custom() const override; + int64_t numel_custom() const override; + bool is_contiguous_custom(at::MemoryFormat memory_format) const override; + c10::SymIntArrayRef sym_sizes_custom() const override; + c10::SymInt sym_size_custom(int64_t d) const override; + c10::SymIntArrayRef sym_strides_custom() const override; + c10::SymInt sym_storage_offset_custom() const override; + c10::Device device_custom() const override; + c10::Layout layout_impl() const override; + + private: + const char* tensorimpl_type_name() const override; + void set_constructor_metadata(); + functionalization::FunctionalStorageImpl* functional_storage_impl() const; + + // This is used to re-implement shallow_copy_and_detach for + // FunctionalTensorWrapper. The implementation is identical, but we just need + // to return a subclass instead of a plain TensorImpl. + // TODO: maybe it's possible to arrange for that to happen automatically + // without an override here? + template + c10::intrusive_ptr shallow_copy_and_detach_core( + VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const; + + void shallow_copy_from(const c10::intrusive_ptr& impl) override; + void copy_tensor_metadata_and_refresh( + const FunctionalTensorWrapper* src_impl, + FunctionalTensorWrapper* dest_impl, + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const; + + // Note that value is not taken by reference: internally, the wrapper will + // change the value tensor that it points to over time. + Tensor value_; + int64_t level_{}; + // These two counters are used for identifying + // whether all the mutations on a given tensor are hidden from autograd or + // not. If we have an input mutation that is hidden from autograd, then once + // we convert the input mutation to a copy_() we know it will be safe to hide + // the copy_() from autograd as well. + bool has_metadata_mutation_ = false; + bool is_multi_output_view_ = false; + // Did the tensor experience a set_() call. + bool was_storage_changed_ = false; + // Did the tensor experience any view operation with symbolic int. + bool is_symbolic_ = false; + + size_t generation_ = 0; + std::vector view_metas_; + + protected: + static void copy_tensor_metadata( + const FunctionalTensorWrapper* src_impl, + FunctionalTensorWrapper* dest_impl, + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change); +}; + +// Utility functions for the functionalization pass. + +namespace functionalization { +namespace impl { + +TORCH_API inline FunctionalTensorWrapper* unsafeGetFunctionalWrapper( + const Tensor& tensor) { + auto functional_impl = + static_cast(tensor.unsafeGetTensorImpl()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(functional_impl != nullptr); + return functional_impl; +} + +TORCH_API bool isBaseTensor(const at::Tensor& tensor); + +TORCH_API bool isFunctionalTensor(const at::Tensor& tensor); +TORCH_API bool isFunctionalTensor(const std::optional& t); +TORCH_API bool isFunctionalTensor( + const c10::List>& t_list); +TORCH_API bool isFunctionalTensor(ITensorListRef list); + +TORCH_API Tensor to_functional_tensor(const Tensor& tensor); +TORCH_API std::optional to_functional_tensor( + const std::optional& tensor); +TORCH_API c10::List> to_functional_tensor( + const c10::List>& t_list); +TORCH_API std::vector to_functional_tensor(ITensorListRef t_list); + +TORCH_API void freeze_functional_tensor(const Tensor& tensor); + +TORCH_API Tensor +from_functional_tensor(const Tensor& tensor, bool assert_functional = true); +TORCH_API std::optional from_functional_tensor( + const std::optional& t, + bool assert_functional = true); +TORCH_API c10::List> from_functional_tensor( + const c10::List>& t_list); +TORCH_API std::vector from_functional_tensor(ITensorListRef t_list); + +TORCH_API void sync(const at::Tensor& t); +TORCH_API void sync(const std::optional& t); +TORCH_API void sync(const c10::List>& t_list); +TORCH_API void sync(ITensorListRef t_list); + +TORCH_API void replace_(const Tensor& functional_tensor, const Tensor& other); +TORCH_API void replace_( + const ITensorListRef functional_tensor, + ITensorListRef other); + +TORCH_API void commit_update(const Tensor& functional_tensor); +TORCH_API void commit_update(ITensorListRef functional_tensor); + +TORCH_API void unsafe_reset_storage(const Tensor& functional_tensor); + +TORCH_API void mark_mutation_hidden_from_autograd( + const Tensor& functional_tensor); + +TORCH_API bool are_all_mutations_hidden_from_autograd( + const Tensor& functional_tensor); + +TORCH_API bool are_all_mutations_under_no_grad_or_inference_mode( + const Tensor& functional_tensor); + +// These two methods are XLA-specific logic and are no-ops +// for the normal functionalization flow. +TORCH_API void propagate_xla_data( + const Tensor& functional_tensor, + const Tensor& other); +TORCH_API void propagate_xla_data( + const ITensorListRef functional_tensor, + ITensorListRef other); + +TORCH_API void propagate_xla_data_direct( + const Tensor& tensor, + const Tensor& other); +TORCH_API void propagate_xla_data_direct( + const ITensorListRef tensor, + ITensorListRef other); + +Tensor create_functional_tensor_with_view_meta( + const Tensor& view_to_wrap, + const Tensor& base, + functionalization::ViewMeta meta, + int64_t out_idx = 0); +std::vector create_functional_tensor_with_view_meta( + ITensorListRef view_to_wrap, + const Tensor& base, + const functionalization::ViewMeta& meta); + +void mutate_view_meta( + const Tensor& self, + const functionalization::ViewMeta& meta); + +void set_sizes_strides_offset(const Tensor& out, const Tensor& meta_out); +void set_sizes_strides_offset( + const std::vector& outs, + const std::vector& meta_outs); + +// ~~~~~ TLS used in functionalization ~~~~~ + +TORCH_API bool getFunctionalizationReapplyViewsTLS(); +TORCH_API void setFunctionalizationReapplyViewsTLS(bool reapply_views); + +class TORCH_API FunctionalizationReapplyViewsGuard { + public: + FunctionalizationReapplyViewsGuard(bool reapply_views) + : prev_(getFunctionalizationReapplyViewsTLS()) { + setFunctionalizationReapplyViewsTLS(reapply_views); + } + + ~FunctionalizationReapplyViewsGuard() { + setFunctionalizationReapplyViewsTLS(prev_); + } + + FunctionalizationReapplyViewsGuard( + const FunctionalizationReapplyViewsGuard&) = delete; + FunctionalizationReapplyViewsGuard operator=( + const FunctionalizationReapplyViewsGuard&) = delete; + FunctionalizationReapplyViewsGuard(FunctionalizationReapplyViewsGuard&&) = + delete; + FunctionalizationReapplyViewsGuard operator=( + FunctionalizationReapplyViewsGuard&&) = delete; + + private: + bool prev_; +}; + +} // namespace impl + +// Helper function to call an out-of-place composite aten kernel that may use +// mutations / views internally, and functionalize them. +TORCH_API void functionalize_op_helper( + const c10::OperatorHandle& op, + torch::jit::Stack* stack); + +template +struct _functionalize_aten_op final {}; + +template +struct _functionalize_aten_op final { + static ReturnType call( + typename c10::maybe_keep_symint::type... args) { + using FuncType = ReturnType( + typename c10::maybe_keep_symint::type...); + auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow( + (const char*)Op::name, (const char*)Op::overload_name) + .typed(); + + return c10::impl::BoxedKernelWrapper::call( + c10::BoxedKernel::makeFromFunction(), + op, + // BoxedKernelWrapper knows to ignore this keyset argument, + // because functionalize_op_helper doesn't take in a DispatchKeySet + c10::DispatchKeySet(), + args...); + } +}; + +template +using functionalize_aten_op = + _functionalize_aten_op; + +template +using functionalize_aten_op_symint = + _functionalize_aten_op; + +} // namespace functionalization +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/InferSize.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/InferSize.h new file mode 100644 index 0000000000000000000000000000000000000000..53d1b395453ea208b355750ca6839b9af8737e40 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/InferSize.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +// Infers the size of a dim with size -1, if it exists. Also checks that new +// shape is compatible with the number of elements. +// +// templated to handle std::vector and DimVector use cases, see +// below +// +template +inline void infer_size_impl( + InputArrayRef shape, + NumelType numel, + ResultVec& res) { + NumelType newsize = 1; + // N.B. this is an index, not a sym dim! + std::optional infer_dim; + for (int64_t dim = 0, ndim = shape.size(); dim != ndim; dim++) { + if (shape[dim] == -1) { + if (infer_dim) { + throw std::runtime_error("only one dimension can be inferred"); + } + infer_dim = dim; + } else if (shape[dim] >= 0) { + newsize *= shape[dim]; + } else { + AT_ERROR("invalid shape dimension ", shape[dim]); + } + } + + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(numel, newsize)) || + (infer_dim && newsize > 0 && numel % newsize == 0)) { + if (infer_dim) { + // We have a degree of freedom here to select the dimension size; follow + // NumPy semantics and just bail. However, a nice error message is needed + // because users often use `view` as a way to flatten & unflatten + // dimensions and will otherwise be confused why + // empty_tensor.view( 0, 0) + // works yet + // empty_tensor.view(-1, 0) + // doesn't. + TORCH_CHECK( + newsize != 0, + "cannot reshape tensor of 0 elements into shape ", + shape, + " because the unspecified dimension size -1 can be any " + "value and is ambiguous"); + res[*infer_dim] = numel / newsize; + } + return; + } + + std::ostringstream ss; + ss << "shape '" << shape << "' is invalid for input of size " << numel; + throw std::runtime_error(ss.str()); +} + +inline std::vector infer_size(IntArrayRef shape, int64_t numel) { + auto res = shape.vec(); + infer_size_impl(shape, numel, res); + return res; +} + +inline at::DimVector infer_size_dv(IntArrayRef shape, int64_t numel) { + auto res = at::DimVector(shape); + infer_size_impl(shape, numel, res); + return res; +} + +inline at::SymDimVector infer_size_dv( + c10::SymIntArrayRef shape, + c10::SymInt numel) { + auto res = at::SymDimVector(shape); + infer_size_impl( + shape, std::move(numel), res); + return res; +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/InitialTensorOptions.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/InitialTensorOptions.h new file mode 100644 index 0000000000000000000000000000000000000000..d6914552eb0df70b18077c6ef10a55149790b5d6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/InitialTensorOptions.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace at { + +// Represents the initial TensorOptions, before the "defaults" are ever changed. +// This is designed to be used in library code, where the explicit devices, +// dtypes, etc. are known. NOTE: this is not a stable API. +inline TensorOptions initialTensorOptions() { + return TensorOptions(kCPU).dtype(kFloat).layout(kStrided).requires_grad( + false); +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Layout.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Layout.h new file mode 100644 index 0000000000000000000000000000000000000000..ea71e2b469bcf02365c78ebfba1b1d0362b6e531 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Layout.h @@ -0,0 +1,2 @@ +#pragma once +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyBatchedFallback.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyBatchedFallback.h new file mode 100644 index 0000000000000000000000000000000000000000..beef24a6ed9c5b8373a0db5bcc16f268b8c18726 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyBatchedFallback.h @@ -0,0 +1,25 @@ +#pragma once +#include +#include +#include + +namespace at { + +// If an operator doesn't have a batching rule implemented then we fallback +// to this implementation. The fallback only works on out-of-place operators +// that return only tensors with new memory. (e.g., no in-place operators, no +// view operations). +// +// The fallback effectively takes all of the BatchedTensors in `stack`, slices +// them, and runs `op` on all of the corresponding slices to produce slices +// of the outputs. The output slices then get `torch.stack`ed to create the +// final returns. +// +// The performance of the fallback is not very good because it introduces an +// extra copy from stacking the sliced outputs. Because of this, we prefer to +// write batching rules for operators whenever possible. +void batchedTensorForLoopFallback( + const c10::OperatorHandle& op, + torch::jit::Stack* stack); + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyBatchedTensorImpl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyBatchedTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..098fbf9d6292fdeb0fff5e3786b471b742540723 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyBatchedTensorImpl.h @@ -0,0 +1,160 @@ +#pragma once + +#include + +#include +#include +#include + +namespace at { + +// We assume this in a few other places in the codebase, +// but there isn't a centralized definition. +constexpr int64_t kVmapMaxTensorDims = 64; + +// The valid vmap levels range from [0, 64). This effectively means that we +// support a maximum of 64 nested vmaps. +constexpr int64_t kVmapNumLevels = 64; + +// Store this number of elements of BatchDims on the stack. Most people will +// probably use <= 5 nested vmaps, but adjust this number as necessary. +constexpr int64_t kBatchDimsStackSize = 5; + +// a BatchDim represents a "private" dimension on a Tensor created inside of +// vmap. It is a (level, dim) tuple, with the `dim` indicating which dimension +// is being vmap'ed over and the `level` being an identifier for which vmap +// said dimension was created inside. The `dim` corresponds to a "physical +// dim" - it is a dimension index on the underlying physical tensor that is +// being vmapped over. +struct BatchDim { + BatchDim(int64_t level, int64_t dim) : dim_(dim), level_(level) {} + int64_t dim() const { + return dim_; + } + int64_t level() const { + return level_; + } + + private: + int64_t dim_; + int64_t level_; +}; + +using BatchDims = SmallVector; +using BatchDimsRef = ArrayRef; + +// A BatchedTensorImpl holds an underlying Tensor and a list of BatchDim +// NB: We use the term "BatchedTensor" to mean a Tensor that is backed with a +// BatchedTensorImpl. +// +// The batch dimensions are treated as being "private"; they are not +// user-visible. For example, in the following Tensor, +// bt = BatchedTensorImpl(ones(2, 3, 5, 7), [(lvl=1, dim=0), (lvl=2, dim=1)]) +// dimensions 0 and 1 are batch dimensions. +// +// bt.sizes() returns (5, 7); bt.sum(0) performs a reduction over the (public) +// dim 0, which is equivalent to dim 3 in the underlying ones(2, 3, 5, 7) +// tensor. +struct TORCH_API BatchedTensorImpl : public c10::TensorImpl { + explicit BatchedTensorImpl(Tensor value, BatchDims bdims); + + // Returns a reference to BatchDims that represent which dimensions of this + // tensor are private. + BatchDimsRef bdims() const { + return bdims_; + } + + // BatchedTensorImpl wraps a Tensor + const Tensor& value() const { + return value_; + }; + + // Given a public dimension index, return the dimension index in the + // underlying value() tensor. For example, if we have + // bt = BatchedTensorImpl(ones(2, 3, 5, 7), [(lvl=1, dim=0), (lvl=2, + // dim=2)]) + // bt.actualDim(0) -> 1 + // bt.actualDim(1) -> 3 + // bt.actualDim(2) -> Error + int64_t actualDim(int64_t dim, bool wrap_dim = true) const; + + // We have to override this because we opted into CustomStrides + IntArrayRef strides_custom() const override; + // Override a bunch of methods inherited from TensorImpl to return error + // messages. + bool is_contiguous_custom(at::MemoryFormat memory_format) const override; + void set_size(int64_t dim, int64_t new_size) override; + void set_stride(int64_t dim, int64_t new_stride) override; + void set_storage_offset(int64_t storage_offset) override; +#ifdef DEBUG + bool has_storage() const override; +#endif + + private: + // see NOTE: [BatchedTensorImpl levels invariant] + void checkInvariants() const; + const char* tensorimpl_type_name() const override; + + Tensor value_; + + // Note: [BatchedTensorImpl levels invariant] + // There is an invariant that the BatchDims must be stored in increasing + // `level` order. That is, for i < j, bdims_[i].level must be less than + // bdims_[j].level. + BatchDims bdims_; +}; + +// NB: We use the term "BatchedTensor" to mean a Tensor that is backed with a +// BatchedTensorImpl. +inline bool isBatchedTensor(const Tensor& tensor) { + return tensor.unsafeGetTensorImpl()->key_set().has(DispatchKey::Batched); +} + +// It is unsafe to call this on a Tensor that is not backed by a +// BatchedTensorImpl. Please use `maybeGetBatchedImpl` whenever possible. +inline BatchedTensorImpl* unsafeGetBatchedImpl(const Tensor& tensor) { + return static_cast(tensor.unsafeGetTensorImpl()); +} + +inline BatchedTensorImpl* maybeGetBatchedImpl(const Tensor& tensor) { + if (!isBatchedTensor(tensor)) { + return nullptr; + } + return unsafeGetBatchedImpl(tensor); +} + +// Returns a bitset. If bit i is set, then that means dim i is a batchdim. +inline std::bitset createBatchDimBitset( + BatchDimsRef bdims) { + std::bitset is_bdim; + for (const auto& bdim : bdims) { + is_bdim.set(bdim.dim()); + } + return is_bdim; +} + +// Creates a bitset for all of the levels present in `bdims` +inline std::bitset createVmapLevelsBitset(BatchDimsRef bdims) { + std::bitset result; + for (const auto& bdim : bdims) { + result.set(bdim.level()); + } + return result; +} + +inline std::ostream& operator<<(std::ostream& out, const BatchDim& bdim) { + out << "(lvl=" << bdim.level() << ", dim=" << bdim.dim() << ")"; + return out; +} + +// Use this to construct a BatchedTensor from a regular Tensor +TORCH_API Tensor makeBatched(const Tensor& tensor, BatchDims bdims); + +// Adds a batch dim to `tensor`, returning a BatchedTensor +TORCH_API Tensor addBatchDim(const Tensor& tensor, int64_t level, int64_t dim); + +// Checks if an inplace operation on self and other is "vmap compatible". +// See NOTE: [vmap-incompatible in-place operations] for the definition of this. +TORCH_API bool inplaceIsVmapCompatible(const Tensor& self, const Tensor& other); + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h new file mode 100644 index 0000000000000000000000000000000000000000..a1231088c2b31e3fd8c40ed17ccdd41f36035367 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +namespace at::impl { + +// VmapMode contains a thread local count of how many nested vmaps +// we are currently inside. That number is known as the `vmap level`. +// VmapMode is used in the implementation of the Python `torch.vmap` API. +// +// NOTE: this is NOT the c++ api for torch.vmap. That doesn't exist yet. + +struct TORCH_API VmapMode { + // Returns the vmap level, aka the count of how many nested vmaps we're in. + static int64_t current_vmap_level(); + + // Increment the count of nested vmaps. If this causes the vmap level to be + // greater than 0, then it enables DispatchKey::VmapMode on all tensors. + static int64_t increment_nesting(); + + // Decrements the count of nested vmaps. If this causes the vmap level to be + // equal to 0, then it disables DispatchKey::VmapMode on all tensors. + static int64_t decrement_nesting(); +}; + +} // namespace at::impl diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h new file mode 100644 index 0000000000000000000000000000000000000000..97729b3254e7458fa76fed74469bf77ed796fe7d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h @@ -0,0 +1,183 @@ +#pragma once + +#include +#include + +namespace at { + +// This file contains abstractions used for transforming *logical* vmap +// arguments into *physical* arguments. (Keep reading for definitions of these +// terms). + +// NOTE: [Logical vs physical args] +// Consider the following vmap. +// vmap(vmap(func, in_dims=(2,)), in_dims=(0,))(torch.ones(2, 3, 4)) +// This would produce a BatchedTensor wrapping a Tensor of size [2, 3, 4], +// with batch dims 0 and 2: +// BatchedTensor(ones(2, 3, 4), bdims=[(lvl=1,dim=0),(lvl=2,dim=2)]) +// +// We say the *logical* view of the tensor has size [3] -- tensors inside +// `func` appear to have size [3]. +// However, the *physical* underlying tensor (the one passed to vmap) has size +// [2, 3, 4]. +// +// This notion of logical vs physical also extends to non-tensor arguments. +// Consider the previous tensor; let's assume the user called +// `torch.sum(tensor, dim=0)` inside of `func`. Then the logical +// dimension they are reducing over is dim 0 but the physical dim is dim 1 +// (the first non-batch dimension) + +// Forward declared; see NOTE: [What is a VmapPhysicalView?] +struct VmapPhysicalView; + +// Most PyTorch operators take 4 or fewer inputs. +constexpr int64_t kVmapTransformStaticInputSize = 4; +using VmapPhysicalViewVec = + SmallVector; + +// Pytorch generally advertises good performance for <= 5 dims. +// (see ATen/core/DimVector.h). We add a few extra dims (~3) for vmap +// dimensions to get 8. Adjust this number as necessary +constexpr int64_t kVmapStaticDimVecSize = 8; +using VmapDimVector = SmallVector; +using VmapSymDimVector = SmallVector; + +// NOTE: [What is an VmapTransform?] +// An *VmapTransform* converts logical views of tensors to physical views. +// +// Batching rules use VmapTransforms to convert logical arguments to +// physical arguments, then call one or more at:: operator that handles the +// physical arguments, and then converts the physical result back to a logical +// argument. + +// VmapTransform for operators that take tensors with multiple batch dims. +// Given one or more logical views on Tensors, `logicalToPhysical` +// permutes all of the batch dims to the front of the tensor, aligns +// and expands the batch dims to match each other (according to their `level`), +// and returns a VmapPhysicalView on the tensor(s). +struct TORCH_API MultiBatchVmapTransform { + static VmapPhysicalView logicalToPhysical(const Tensor& logical_tensor); + static VmapPhysicalViewVec logicalToPhysical(ITensorListRef logical_tensors); +}; + +// VmapTransform for operators that broadcast all inputs. +// Given some logical views on Tensors, `logicalToPhysical`: +// - permutes all of the batch dims to the front of the tensors +// - aligns all the batch dims to the collective levels of all of the tensors. +// If a tensor does not have a batch dim for a vmap level, then it receives +// a size-one dimension for said level. +// - aligns the non-batch dims to have the same dimensionality, adding extra +// size-1 dimensions in between the batch dimensions and the non-batch +// dimensions so that the batch dimensions are lined up from the right. +// +// For example: given inputs of size (B, 2) and (B, 3, 2) where B is the batch +// dimension, BroadcastingVmapTransform returns VmapPhysicalViews that wrap +// tensors of size (B, 1, 2) and (B, 3, 2). +// +// Given inputs of size (B, 2) and (2,), BroadcastingVmapTransform returns +// VmapPhysicalViews wrapping tensors of size (B, 2) and (1, 2). We don't +// actually *need* to return a tensor of size (1, 2) for the second tensor +// because the broadcasting operation takes care of that for us, but we do +// it anyways to keep things simple. +struct TORCH_API BroadcastingVmapTransform { + static VmapPhysicalViewVec logicalToPhysical(TensorList logical_tensors); +}; + +// Forward declared, if you're reading this file head to toe, don't worry about +// it yet. +struct VmapPhysicalToLogicalMap; + +// NOTE: [What is a VmapPhysicalView?] +// VmapPhysicalView represents a physical view on a Tensor. +// +// One can use it to further convert logical dimension indices, logical shapes, +// and more to their physical variants, or convert a new (physical) tensor into +// a logical BatchedTensor. (TODO(rzou): some of these are not yet implemented). +// +// VmapPhysicalView stores a physical tensor with all of its batch dimensions at +// the front and some levels that correspond to said batch dimensions. +// +// The levels bitset specifies which vmap levels correspond to the batch +// dimensions at the front of the tensor. In particular, the number of set bits +// corresponds to the number of batch dimensions on `tensor` and the rightmost +// bit of `levels` specifies the maximum number of nested vmaps we are in at +// this point in time. +// For example, given: +// physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5, 6), levels={1, 3}) +// +// Rightmost bit of `levels` is 3 indicating the number of nested vmaps less +// than or equal to 3. +// bitset: 010100 +// ^ +// | +// levels: 012345 +struct TORCH_API VmapPhysicalView { + VmapPhysicalView(Tensor&& tensor, std::bitset levels) + : levels_(levels), tensor_(std::move(tensor)) { + TORCH_INTERNAL_ASSERT(!isBatchedTensor(tensor_)); + } + + Tensor& tensor() { + return tensor_; + } + const Tensor& tensor() const { + return tensor_; + } + + // Maps logical dim indices to physical dim indices. Also does dim wrapping. + // + // For example, given: + // physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5), levels={1, 3}) + // + // Then physical_view.getPhysicalDims({0, 1}) returns {2, 3}. + // This is because the size of levels tell us that the first two dimensions + // of `tensor_` are batch dimensions, so a logical dim of `n` is actually + // a physical dim of `n + 2`. + VmapDimVector getPhysicalDims(OptionalIntArrayRef logical_dims) const; + int64_t getPhysicalDim(int64_t logical_dim) const; + + // Returns a VmapPhysicalToLogicalMap object. This can be used for + // mapping a physical tensor to a new logical tensor (BatchedTensor) + VmapPhysicalToLogicalMap getPhysicalToLogicalMap() const; + + // Maps a logical shape to a physical shape by pre-pending the batch + // sizes to the logical shape. + VmapDimVector getPhysicalShape(IntArrayRef logical_shape) const; + + int64_t numBatchDims() const; + + private: + int64_t numLogicalDims() const; + + std::bitset levels_; + Tensor tensor_; +}; + +// Convenience struct used for mapping a physical tensor (a non-BatchedTensor) +// to a logical one (BatchedTensor). It holds some levels that are used to do +// the mapping and assumes that the batch dimensions in the physical tensor all +// occur at the front of the tensor. +struct TORCH_API VmapPhysicalToLogicalMap { + VmapPhysicalToLogicalMap(std::bitset levels) + : levels_(levels) {} + + // Maps a physical tensor to a new logical tensor (BatchedTensor). + // Assumes that all of the "batch dimensions" are at the front + // of the physical tensor. For example, given: + // - x = rank-4 Tensor with size 2, 3, 5, 7 + // - levels = (2, 4) + // Returns: + // - BatchedTensor(x, bdims=[(dim=0,lvl=2), (dim=1, lvl=4)]) + Tensor apply(const Tensor& physical_tensor) const; + + // Given a vector of physical tensors, + // 1. maps each tensor to a new logical tensor. Assumes that all of the + // "batch dimensions" are at the front of the physical tensors. + // 2. stores the new logical tensors back into the passed-in vector. This is + // to avoid additional dynamic allocations. + void applyInplace(std::vector& physical_tensors) const; + + std::bitset levels_; +}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/MapAllocator.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/MapAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..db1258beee525cee1788c888f366363f96c74c83 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/MapAllocator.h @@ -0,0 +1,143 @@ +#pragma once + +#include +#include + +namespace at { + +enum MappedAllocatorModes { + ALLOCATOR_MAPPED_SHARED = 1, + ALLOCATOR_MAPPED_SHAREDMEM = 2, + ALLOCATOR_MAPPED_EXCLUSIVE = 4, + ALLOCATOR_MAPPED_NOCREATE = 8, + ALLOCATOR_MAPPED_KEEPFD = 16, + ALLOCATOR_MAPPED_FROMFD = 32, + ALLOCATOR_MAPPED_UNLINK = 64 +}; + +// Sentinel value/type to help distinguish the file descriptor constructor from +// the non-file descriptor constructor +enum WithFd { WITH_FD }; + +TORCH_API std::string NewProcessWideShmHandle(); + +class TORCH_API MapAllocator { + public: + MapAllocator(c10::string_view filename, int flags, size_t size); + MapAllocator( + WithFd, + c10::string_view filename, + int fd, + int flags, + size_t size); + MapAllocator(const MapAllocator&) = delete; + MapAllocator& operator=(const MapAllocator&) = delete; + MapAllocator(MapAllocator&&) = delete; + MapAllocator& operator=(MapAllocator&&) = delete; + + const char* filename() const { + return filename_.c_str(); + } + int fd() const { +#ifdef _WIN32 + TORCH_CHECK(false, "MapAllocator::fd() is unsupported on Windows"); +#else + return fd_; +#endif + } + ptrdiff_t size() const { + return size_; + } + // Return a pointer to the actual data for this allocator + // (in the case of the refcounted allocator, this is offset + // from the base pointer.) + virtual void* data() const { + return base_ptr_; + } + + int flags() const { + return flags_; + } + + static MapAllocator* fromDataPtr(const at::DataPtr&); + static at::DataPtr makeDataPtr( + c10::string_view filename, + int flags, + size_t size, + size_t* actual_size_out); + static at::DataPtr makeDataPtr( + WithFd, + const char* filename, + int fd, + int flags, + size_t size, + size_t* actual_size_out); + + // Closes the data. Helps us avoid destructor shenanigans + virtual void close(); + + // This is very dangerous. You have to redefine this destructor for each + // subclass + virtual ~MapAllocator(); + + protected: + bool closed_ = false; + std::string filename_; + int flags_ = 0; + ptrdiff_t size_; /* mapped size */ +#ifdef _WIN32 + void* handle_; + void* event_; + std::string eventname_; +#else + int fd_ = -1; +#endif + void* base_ptr_ = nullptr; +}; + +// Base-from-member idiom +struct TORCH_API RefcountedMapAllocatorArgCheck { + RefcountedMapAllocatorArgCheck(int flags); +}; + +class TORCH_API RefcountedMapAllocator : private RefcountedMapAllocatorArgCheck, + public MapAllocator { + public: + RefcountedMapAllocator(const char* filename, int flags, size_t size); + RefcountedMapAllocator( + WithFd, + const char* filename, + int fd, + int flags, + size_t size); + + static RefcountedMapAllocator* fromDataPtr(const at::DataPtr&); + static at::DataPtr makeDataPtr( + const char* filename, + int flags, + size_t size, + size_t* actual_size_out); + static at::DataPtr makeDataPtr( + WithFd, + const char* filename, + int fd, + int flags, + size_t size, + size_t* actual_size_out); + + void* data() const override; + + void incref(); + int decref(); + void close() override; + + ~RefcountedMapAllocator() override { + RefcountedMapAllocator::close(); + } + + protected: + void checkFlags(); + void initializeAlloc(); +}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/MatrixRef.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/MatrixRef.h new file mode 100644 index 0000000000000000000000000000000000000000..74a20a6870901a3b44a346cdf785ac8e25d15ebd --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/MatrixRef.h @@ -0,0 +1,107 @@ +#pragma once +#include +#include + +namespace at { +/// MatrixRef - Like an ArrayRef, but with an extra recorded strides so that +/// we can easily view it as a multidimensional array. +/// +/// Like ArrayRef, this class does not own the underlying data, it is expected +/// to be used in situations where the data resides in some other buffer. +/// +/// This is intended to be trivially copyable, so it should be passed by +/// value. +/// +/// For now, 2D only (so the copies are actually cheap, without having +/// to write a SmallVector class) and contiguous only (so we can +/// return non-strided ArrayRef on index). +/// +/// P.S. dimension 0 indexes rows, dimension 1 indexes columns +template +class MatrixRef { + public: + typedef size_t size_type; + + private: + /// Underlying ArrayRef + ArrayRef arr; + + /// Stride of dim 0 (outer dimension) + size_type stride0; + + // Stride of dim 1 is assumed to be 1 + + public: + /// Construct an empty Matrixref. + /*implicit*/ MatrixRef() : arr(nullptr), stride0(0) {} + + /// Construct an MatrixRef from an ArrayRef and outer stride. + /*implicit*/ MatrixRef(ArrayRef arr, size_type stride0) + : arr(arr), stride0(stride0) { + TORCH_CHECK( + arr.size() % stride0 == 0, + "MatrixRef: ArrayRef size ", + arr.size(), + " not divisible by stride ", + stride0) + } + + /// @} + /// @name Simple Operations + /// @{ + + /// empty - Check if the matrix is empty. + bool empty() const { + return arr.empty(); + } + + const T* data() const { + return arr.data(); + } + + /// size - Get size a dimension + size_t size(size_t dim) const { + if (dim == 0) { + return arr.size() / stride0; + } else if (dim == 1) { + return stride0; + } else { + TORCH_CHECK( + 0, "MatrixRef: out of bounds dimension ", dim, "; expected 0 or 1"); + } + } + + size_t numel() const { + return arr.size(); + } + + /// equals - Check for element-wise equality. + bool equals(MatrixRef RHS) const { + return stride0 == RHS.stride0 && arr.equals(RHS.arr); + } + + /// @} + /// @name Operator Overloads + /// @{ + ArrayRef operator[](size_t Index) const { + return arr.slice(Index * stride0, stride0); + } + + /// Disallow accidental assignment from a temporary. + /// + /// The declaration here is extra complicated so that "arrayRef = {}" + /// continues to select the move assignment operator. + template + std::enable_if_t, MatrixRef>& operator=( + U&& Temporary) = delete; + + /// Disallow accidental assignment from a temporary. + /// + /// The declaration here is extra complicated so that "arrayRef = {}" + /// continues to select the move assignment operator. + template + std::enable_if_t, MatrixRef>& operator=( + std::initializer_list) = delete; +}; + +} // end namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/MetaFunctions.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/MetaFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..02912209e287d1483e617c108872a8ddb0a8c80c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/MetaFunctions.h @@ -0,0 +1,29 @@ +#include + +// TODO Undo all logic introduced for Note [Avoiding Include Cycles In Static Dispatch] +// Code introduced to avoid cyclic dependency in static dispatch is no longer +// needed as static dispatch logic is moved from TensorBody.h, which caused cycles in the first place, +// to Operators.cpp for supporting multiple backends with multiple kernels. +// +// Note [Avoiding Include Cycles In Static Dispatch] +// In order to avoid #include cycles in the static dispatch build, we've carefully split out +// the static function definition files into {DispatchKey}Functions.h and {DispatchKey}Functions_inl.h. +// +// Without this split, the include cycle looks like TensorBody.h -> CPUFunctions.h -> TensorBody.h. +// - TensorBody.h #includes CPUFunctions.h in the static dispatch build, because the tensor methods +// all need to call into the fastpath C++ API defined in CPUFunctions.h. The methods are also all +// directly inlined into TensorBody.h. +// - CPUFunctions.h #includes TensorBody.h because it contains function declarations for the entire C++ API, +// which include functions that have defaultable std::optional arguments. +// That requires knowing the full Tensor class definition. +// +// We break the cycle by doing the following: +// - Split out CPUFunction.h into two files: CPUFunctions.h and CPUFunctions_inl.h +// - CPUFunction.h is a dummy file that just includes the Tensor class and includes CPUFunctions_inl., +// - CPUFunctions_inl.h includes everything else +// - (only in the static dispatch build) TensorBody.h makes sure to finish defining the Tensor class, +// and then it includes CPUFunctions_inl.h. +// - All other files that want the cpu fastpath functions can include CPUFunctions.h directly. +// - This also means that static dispatch build, CPUFunctions.h only needs to +// #include TensorBody.h, and it will automatically bring in CPUFunctions_inl.h. +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/MethodOperators.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/MethodOperators.h new file mode 100644 index 0000000000000000000000000000000000000000..bae6d022feb430980f670f6efbf9465c8e99350e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/MethodOperators.h @@ -0,0 +1,443 @@ +#pragma once + +// @generated by torchgen/gen.py from MethodOperators.h + +#ifdef TORCH_ASSERT_NO_OPERATORS +#error This change adds a dependency on native_functions.yaml, \ + meaning the file will need to be re-compiled every time an operator \ + is changed or added. Consider if your change would be better placed in \ + another file, or if a more specific header might achieve the same goal. \ + See NOTE: [Tensor vs. TensorBase] +#endif + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace _ops { + +} // namespace _ops +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/NamedTensor.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/NamedTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..a7606b0a668a43800b89755af1371551909b23d5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/NamedTensor.h @@ -0,0 +1 @@ +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/NamedTensorUtils.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/NamedTensorUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..966145ae23fa234a834a0ea99c19995f83c5c07a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/NamedTensorUtils.h @@ -0,0 +1,214 @@ +#pragma once +#include +#include +#include + +#include +#include + +namespace at { + +using NameVector = SmallVector; + +inline bool has_names(const ITensorListRef& tensors) { + return std::any_of(tensors.begin(), tensors.end(), [](const Tensor& t) { + return t.has_names(); + }); +} + +// Converts dim to an positional index. Errors if `dim` cannot be used to +// refer to any dimension of tensor. +TORCH_API int64_t dimname_to_position(const Tensor& tensor, Dimname dim); +TORCH_API std::vector dimnames_to_positions( + const Tensor& tensor, + DimnameList dims); + +// Unifies two DimnameList to produce a third. This is useful for implementing +// the named inference rule for binary broadcasting operations like add. +// +// There are three main constraints: +// 1) Check matching: Names must match positionally from the right. +// 2) Check misaligned: If a name `n` is in `names`, then it must appear at +// the same index from the right in other. +// 3) The output names are obtained by unifying the names individually from the +// right. +TORCH_API std::vector unify_from_right( + DimnameList names, + DimnameList other, + const char* action = "broadcast"); + +[[noreturn]] inline void reportNYIDimnameOverload(const char* op_name) { + TORCH_CHECK( + false, + op_name, + ": You passed a dimname (string) to this op in place of a dimension " + "index but it does not yet support this behavior. Please pass a dimension " + "index to work around this."); +} + +// [NOTE] Writing name inference rules +// +// Operators that support named tensors are either composed of operations that +// support named tensors or implement some name inference rule. An op that +// implements its own name inference rule generally looks like the following: +// +// Tensor op(...) { +// perform_shape_checks(...); +// # (1) +// auto maybe_outnames = compute_outnames(...); +// auto result = [&]() { +// NoNamesGuard guard; +// return op_impl(...); +// }(); +// # (2) +// propagate_names_if_nonempty(result, maybe_outnames); +// +// Each op has (1) a compute outnames step and (2) a propagate names step. +// +// compute_outnames is responsible for checking that input names match and +// determining what the output names should be. It returns either: +// - {} (if the inputs tensors are all unnamed) +// - non-empty outnames. +// +// propagate_names_if_nonempty propagates the outnames if they exist to the +// result tensors. +// +// The {} case is an optimization; if the user does not use named tensors they +// pay no perf cost for it. + +namespace namedinference { + +const Tensor& propagate_names_if_present_and_nonempty( + const Tensor& result, + std::optional maybe_names, + bool validate_names = false); +// Propagates `names` to `result` if `names` is not empty. +// `names` can be empty; see [NOTE] Writing name inference rules +// If `names` is not empty, `names.size()` should equal `result.dim()`. +// When in doubt, use this overload instead of the others. +TORCH_API const Tensor& propagate_names_if_nonempty( + const Tensor& result, + DimnameList maybe_names, + bool validate_names = false); + +// Propagates `names` to `result`. Only use this if we are certain that there +// are names to propagate (that names is not empty). +TORCH_API const Tensor& propagate_names( + const Tensor& result, + DimnameList names, + bool validate_names = false); + +// Propagates all names from src to result. +TORCH_API void propagate_names(const Tensor& result, const Tensor& src); + +// Propagates all names except for those at the excluded_idxs. +TORCH_API void propagate_names_except( + const Tensor& result, + const Tensor& src, + IntArrayRef excluded_idxs); + +// Used for reduction ops that have a `keepdim` arg. +TORCH_API void propagate_names_for_reduction( + const Tensor& result, + const Tensor& src, + IntArrayRef excluded_idxs, + bool keepdim); + +TORCH_API void propagate_names_for_expand( + const Tensor& result, + const Tensor& self); + +TORCH_API std::vector compute_cat_outnames( + const MaterializedITensorListRef& tensors); + +TORCH_API std::vector compute_broadcast_outnames( + const Tensor& self, + const Tensor& other); + +TORCH_API std::vector broadcast_to_outnames( + const Tensor& tensor, + const Tensor& reference_tensor, + const char* op_name); + +TORCH_API std::vector compute_matmul_outnames( + const Tensor& self, + const Tensor& other); + +TORCH_API std::vector compute_cdist_outnames( + const Tensor& self, + const Tensor& other); + +TORCH_API std::vector compute_bmm_outnames( + const Tensor& result, + const Tensor& self, + const Tensor& other); + +TORCH_API std::vector compute_squeeze_outnames(const Tensor& tensor); +TORCH_API std::vector compute_squeeze_outnames( + const Tensor& tensor, + std::bitset dims); + +std::vector compute_diagonal_outnames( + const Tensor& tensor, + int64_t dim1, + int64_t dim2); + +// TensorImpl* overloads for Legacy TH/THC code. Use these sparingly. + +TORCH_API TensorImpl* propagate_names_if_nonempty( + TensorImpl* result, + DimnameList maybe_names, + bool validate_names = false); + +TORCH_API TensorImpl* propagate_names( + TensorImpl* result, + DimnameList names, + bool validate_names = false); + +TORCH_API void propagate_names(TensorImpl* result, /*const */ TensorImpl* src); + +TORCH_API inline void propagate_names( + const TensorBase& result, + DimnameList names, + bool validate_names = false) { + propagate_names(result.unsafeGetTensorImpl(), names, validate_names); +} + +TORCH_API inline void propagate_names_if_nonempty( + const TensorBase& result, + DimnameList names, + bool validate_names = false) { + propagate_names_if_nonempty( + result.unsafeGetTensorImpl(), names, validate_names); +} + +TORCH_API inline void propagate_names( + const TensorBase& result, + const TensorBase& src) { + propagate_names(result.unsafeGetTensorImpl(), src.unsafeGetTensorImpl()); +} + +// result = m1 @ m2 + bias +TORCH_API std::vector propagate_names_for_addmm( + const Tensor& m1, + const Tensor& m2, + const Tensor& bias); + +TORCH_API std::vector propagate_names_for_addmv( + const Tensor& mat, + const Tensor& vec, + const Tensor& bias); + +TORCH_API void check_names_for_dot(TensorImpl* vec1, TensorImpl* vec2); + +TORCH_API std::vector compute_baddbmm_outnames( + const Tensor& result, + const Tensor& self, + const Tensor& other, + const Tensor& bias); + +TORCH_API bool are_names_equal(TensorImpl* self, TensorImpl* other); + +} // namespace namedinference + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/NativeFunctions.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/NativeFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..c5e8c21c22ba034de5717f2074af1f1cf949dc8f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/NativeFunctions.h @@ -0,0 +1,1344 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunctions.h + +#ifdef TORCH_ASSERT_NO_OPERATORS +#error This change adds a dependency on native_functions.yaml, \ + meaning the file will need to be re-compiled every time an operator \ + is changed or added. Consider if your change would be better placed in \ + another file, or if a more specific header might achieve the same goal. \ + See NOTE: [Tensor vs. TensorBase] +#endif + +#if defined(AT_PER_OPERATOR_HEADERS) && defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS) +#error This change adds a dependency on all pytorch operators, meaning the \ + file will need to be re-compiled every time an operator is changed or added. \ + Consider including a specific operator from \ + and see NOTE [TORCH_ASSERT_ONLY_METHOD_OPERATORS]. +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/NestedTensorImpl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/NestedTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..697969edbbd44fda736fe9ff26b9902d96454a78 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/NestedTensorImpl.h @@ -0,0 +1,286 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at::native { +struct NestedTensorImpl; +inline bool nested_tensor_impl_is_contiguous(const NestedTensorImpl* nt); +int64_t get_numel_from_nested_size_tensor(const at::Tensor& tensor); +at::Tensor construct_nested_strides(const at::Tensor& nested_size); +at::Tensor construct_offsets(const at::Tensor& nested_size); + +struct TORCH_API NestedTensorImpl : public c10::TensorImpl { + explicit NestedTensorImpl( + Storage storage, + c10::DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + at::Tensor nested_sizes, + at::Tensor nested_strides, + at::Tensor storage_offsets); + + explicit NestedTensorImpl( + const at::Tensor& buffer, + at::Tensor nested_sizes, + at::Tensor nested_strides, + at::Tensor storage_offsets); + // assume contiguous, `nested_strides` and `offsets` + // can be infered from `nested_sizes` + explicit NestedTensorImpl( + const at::Tensor& buffer, + const at::Tensor& nested_sizes); + + // This constructor is used creating view tensors from nested tensors + explicit NestedTensorImpl( + c10::TensorImpl::ImplType impl_type, + const at::Tensor& base_tensor, + at::Tensor nested_sizes, + at::Tensor nested_strides, + at::Tensor storage_offsets); + + // TODO: don't expose private implementation details like this; in + // particular, resizing this tensor will mess up our dim() and + // callers cannot fix it. + const Tensor& get_nested_sizes() const { + return nested_sizes_; + } + // TODO: don't expose private implementation details like this + const Tensor& get_nested_strides() const { + return nested_strides_; + } + const Tensor& get_storage_offsets() const { + return storage_offsets_; + } + // Returns nullopt if the ith dimension is irregular. The ith dimension + // of a NestedTensor is regular if the unbound tensors match in + // size at the (i-1)th dimension. + std::optional opt_size(int64_t d) const; + + int64_t size(int64_t d) const { + std::optional optional_size = this->opt_size(d); + TORCH_CHECK( + optional_size.has_value(), + "Given dimension ", + d, + " is irregular and does not have a size."); + return *optional_size; + } + /** + * Return a view of the nested tensor as a 1 dimensional contiguous tensor. + * + * The buffer tensor created by this function shares the same storage_impl as + * the original nested tensor, and therefore can be seen as a view. + * + * @return A newly constructed view tensor + */ + at::Tensor get_buffer() const { + TORCH_CHECK( + nested_tensor_impl_is_contiguous(this), + "NestedTensor must be contiguous to get buffer."); + return get_unsafe_storage_as_tensor(); + } + /** + * If possible use get_buffer() instead. This function returns the storage + * as a tensor directly, which is not safe to use in general. If using this + * function, The caller must ensure to account for nested_sizes, + * nested_strides and storage_offsets. + * + * @return A newly constructed view tensor + */ + at::Tensor get_unsafe_storage_as_tensor() const { + auto buffer_key_set_ = generate_buffer_key_set(); + const auto buffer_size = get_buffer_size(); + auto buffer_tensor_impl = c10::make_intrusive( + c10::TensorImpl::VIEW, Storage(storage_), buffer_key_set_, data_type_); + buffer_tensor_impl->set_sizes_contiguous( + c10::makeArrayRef(static_cast(buffer_size))); + return Tensor(buffer_tensor_impl); + } + + size_t get_buffer_size() const { + return storage_.nbytes() / data_type_.itemsize(); + } + + protected: + const char* tensorimpl_type_name() const override; + + // TODO: numel_custom and is_contiguous_custom can be profitably overridden + // with real implementations + int64_t numel_custom() const override; + c10::SymInt sym_numel_custom() const override; + bool is_contiguous_custom(MemoryFormat) const override; + int64_t size_custom(int64_t d) const override { + return this->size(d); + } + c10::SymInt sym_size_custom(int64_t d) const override { + return c10::SymInt{this->size(d)}; + } + IntArrayRef sizes_custom() const override; + c10::SymIntArrayRef sym_sizes_custom() const override; + IntArrayRef strides_custom() const override; + c10::SymIntArrayRef sym_strides_custom() const override; + + // this one is real + int64_t dim_custom() const override; + + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override; + + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override; + + void shallow_copy_from(const c10::intrusive_ptr& impl) override { + copy_tensor_metadata( + /*src_impl=*/impl.get(), + /*dest_impl=*/this, + /*version_counter=*/version_counter(), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change()); + } + + private: + // Must be called after any changes to our dim() to sync the state + // to TensorImpl. + void refresh_dim(); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::Tensor nested_sizes_, nested_strides_; + // The starting positions of the underlying tensors in contiguous buffer + // i.e. the buffer memory offsets to get the underlying tensors + // The reason to keep this metadata is that, without strong enough constraint + // it cannot be derived from `nested_sizes_` + // and `nested_strides_`: + // 1. when buffer has blanks, e.g. [tensor1, blank, tensor2] + // this can happen e.g. after slicing a nested tensor + // 2. when multiple tensors share a same memory + // 3. when the nesting ordering is changed, e.g. [tensor1, tensor3, tensor2] + // Some strong enough constraints are: + // 1. every underlying tensor is contiguous in memory + // && nesting in ascending order + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::Tensor storage_offsets_; + // NOTE: -1 here means the size is missing + // Optional to allow it to be computed lazily from nested. + // TODO: maybe we can remove this metadata since + // we can compute it from `nested_sizes_` + mutable std::optional> opt_sizes_; + + template + c10::intrusive_ptr shallow_copy_and_detach_core( + VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const; + + /** + * Generates a non-nested key_set from a nested tensor. + * + * For many nested tensor kernel implementations a buffer tensor + * is generated and redispatched to a non-nested kernel this function + * generates the key set used by that buffer tensor + * + * @return Appropriate key set for non-nested tensor + */ + inline c10::DispatchKeySet generate_buffer_key_set() const { + auto buffer_key_set = this->key_set(); + const bool Autograd = buffer_key_set.has_any(c10::autograd_dispatch_keyset); + // Remove nested tensor specific keys + buffer_key_set = buffer_key_set - + c10::DispatchKeySet{ + c10::DispatchKey::NestedTensor, + c10::DispatchKey::AutogradNestedTensor}; + + // Add dense tensor specific keys + buffer_key_set = + buffer_key_set | c10::DispatchKeySet{c10::DispatchKey::Dense}; + buffer_key_set = Autograd + ? c10::DispatchKeySet{c10::DispatchKey::Autograd} | buffer_key_set + : buffer_key_set; + + return buffer_key_set; + } +}; + +inline NestedTensorImpl* get_nested_tensor_impl_or_null( + const at::Tensor& tensor) { + if (tensor.is_nested()) { + return static_cast(tensor.unsafeGetTensorImpl()); + } + return nullptr; +} + +inline NestedTensorImpl* get_nested_tensor_impl(const at::Tensor& tensor) { + TORCH_CHECK( + tensor.is_nested(), "get_nested_tensor_impl requires a NestedTensor."); + return static_cast(tensor.unsafeGetTensorImpl()); +} + +inline bool nested_tensor_impl_is_contiguous(const NestedTensorImpl* nt) { + int64_t ntensors = nt->size(0); + if (ntensors == 0) { + return true; + } + const Tensor &sizemat = nt->get_nested_sizes(), + &stridemat = nt->get_nested_strides(); + const int64_t* offsets_ptr = + nt->get_storage_offsets().const_data_ptr(); + int64_t orig_dim = sizemat.size(1); + // nesting scalars + if (orig_dim == 0) { + // each scalar must be contiguous + // if there is blank memory between underlying scalars + for (int64_t i = 0; i < ntensors; i++) { + if (offsets_ptr[i] != i) { + return false; + } + } + } + // nesting tensors + else { + // if any underlying tensor is non-contiguous + const int64_t *sizemat_ptr = sizemat.const_data_ptr(), + *stridemat_ptr = stridemat.const_data_ptr(); + for (int64_t i = 0; i < ntensors; i++) { + if (stridemat_ptr[orig_dim - 1] != 1) { + return false; + } + int64_t product = sizemat_ptr[orig_dim - 1]; + for (int64_t j = orig_dim - 2; j >= 0; j--) { + if (stridemat_ptr[j] != product) { + return false; + } + product *= sizemat_ptr[j]; + } + sizemat_ptr += orig_dim; + stridemat_ptr += orig_dim; + } + // if there is blank memory between underlying tensors + if (offsets_ptr[0] != 0) { + return false; + } + sizemat_ptr = sizemat.const_data_ptr(); + stridemat_ptr = stridemat.const_data_ptr(); + for (int64_t i = 1; i < ntensors; i++) { + if (offsets_ptr[i] != + offsets_ptr[i - 1] + *sizemat_ptr * *stridemat_ptr) { + return false; + } + sizemat_ptr += orig_dim; + stridemat_ptr += orig_dim; + } + } + // everything is fine + return true; +} + +inline const at::Tensor& get_nested_sizes(const at::Tensor& tensor) { + return get_nested_tensor_impl(tensor)->get_nested_sizes(); +} + +} // namespace at::native diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h new file mode 100644 index 0000000000000000000000000000000000000000..d00195b07e490208db6aa9a015bca79b0cc1c83f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +// For FP16 or BFloat16 inputs, ops should perform internal math in FP32. +template +struct OpMathType { + using type = scalar_t; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType> { + using type = c10::complex; +}; + +template +using opmath_type = typename OpMathType::type; + +namespace { + +inline c10::ScalarType toOpMathType(const c10::ScalarType type) { + switch (type) { +#define DEFINE_CASE(scalar_t, TypeNum) \ + case ScalarType::TypeNum: \ + return CppTypeToScalarType>::value; + + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_INTERNAL_ASSERT(false, "Unrecognized ScalarType: ", type); + } +} + +} // namespace + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/PadNd.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/PadNd.h new file mode 100644 index 0000000000000000000000000000000000000000..573d1a7b88ab7367858df90b6adfebfa9b97d5e9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/PadNd.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace at { + +enum class padding_mode { + reflect, + replicate, + circular, + constant, +}; + +static inline c10::string_view padding_mode_string(padding_mode m) { + switch (m) { + case padding_mode::reflect: + return "reflect"; + case padding_mode::replicate: + return "replicate"; + case padding_mode::circular: + return "circular"; + case padding_mode::constant: + return "constant"; + } + TORCH_CHECK(false, "Invalid padding mode (", static_cast(m), ")"); +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Parallel.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Parallel.h new file mode 100644 index 0000000000000000000000000000000000000000..966e29c0289f371798b11e096973874841e1e8f1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Parallel.h @@ -0,0 +1,158 @@ +#pragma once +#include +#include +#include +#include + +namespace at { + +inline int64_t divup(int64_t x, int64_t y) { + return (x + y - 1) / y; +} + +// Called during new thread initialization +TORCH_API void init_num_threads(); + +// Sets the number of threads to be used in parallel region +TORCH_API void set_num_threads(int); + +// Returns the maximum number of threads that may be used in a parallel region +TORCH_API int get_num_threads(); + +// Returns the current thread number (starting from 0) +// in the current parallel region, or 0 in the sequential region +TORCH_API int get_thread_num(); + +// Checks whether the code runs in parallel region +TORCH_API bool in_parallel_region(); + +namespace internal { + +// Initialise num_threads lazily at first parallel call +inline void lazy_init_num_threads() { + thread_local bool init = false; + if (C10_UNLIKELY(!init)) { + at::init_num_threads(); + init = true; + } +} + +TORCH_API void set_thread_num(int); + +class TORCH_API ThreadIdGuard { + public: + ThreadIdGuard(int new_id) : old_id_(at::get_thread_num()) { + set_thread_num(new_id); + } + + ~ThreadIdGuard() { + set_thread_num(old_id_); + } + + private: + int old_id_; +}; + +} // namespace internal + +/* +parallel_for + +begin: index at which to start applying user function + +end: index at which to stop applying user function + +grain_size: number of elements per chunk. impacts the degree of parallelization + +f: user function applied in parallel to the chunks, signature: + void f(int64_t begin, int64_t end) + +Warning: parallel_for does NOT copy thread local +states from the current thread to the worker threads. +This means for example that Tensor operations CANNOT be used in the +body of your function, only data pointers. +*/ +template +inline void parallel_for( + const int64_t begin, + const int64_t end, + const int64_t grain_size, + const F& f); + +/* +parallel_reduce + +begin: index at which to start applying reduction + +end: index at which to stop applying reduction + +grain_size: number of elements per chunk. impacts number of elements in +intermediate results tensor and degree of parallelization. + +ident: identity for binary combination function sf. sf(ident, x) needs to return +x. + +f: function for reduction over a chunk. f needs to be of signature scalar_t +f(int64_t partial_begin, int64_t partial_end, scalar_t identifiy) + +sf: function to combine two partial results. sf needs to be of signature +scalar_t sf(scalar_t x, scalar_t y) + +For example, you might have a tensor of 10000 entires and want to sum together +all the elements. Parallel_reduce with a grain_size of 2500 will then allocate +an intermediate result tensor with 4 elements. Then it will execute the function +"f" you provide and pass the beginning and end index of these chunks, so +0-2499, 2500-4999, etc. and the combination identity. It will then write out +the result from each of these chunks into the intermediate result tensor. After +that it'll reduce the partial results from each chunk into a single number using +the combination function sf and the identity ident. For a total summation this +would be "+" and 0 respectively. This is similar to tbb's approach [1], where +you need to provide a function to accumulate a subrange, a function to combine +two partial results and an identity. + +Warning: parallel_reduce does NOT copy thread local +states from the current thread to the worker threads. +This means for example that Tensor operations CANNOT be used in the +body of your function, only data pointers. + +[1] https://software.intel.com/en-us/node/506154 +*/ +template +inline scalar_t parallel_reduce( + const int64_t begin, + const int64_t end, + const int64_t grain_size, + const scalar_t ident, + const F& f, + const SF& sf); + +// Returns a detailed string describing parallelization settings +TORCH_API std::string get_parallel_info(); + +// Sets number of threads used for inter-op parallelism +TORCH_API void set_num_interop_threads(int); + +// Returns the number of threads used for inter-op parallelism +TORCH_API int get_num_interop_threads(); + +// Launches inter-op parallel task +TORCH_API void launch(std::function func); +namespace internal { +void launch_no_thread_state(std::function fn); +} // namespace internal + +// Launches intra-op parallel task +TORCH_API void intraop_launch(std::function func); + +// Returns number of intra-op threads used by default +TORCH_API int intraop_default_num_threads(); + +} // namespace at + +#if AT_PARALLEL_OPENMP +#include // IWYU pragma: keep +#elif AT_PARALLEL_NATIVE +#include // IWYU pragma: keep +#endif + +#include // IWYU pragma: keep diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/ParallelFuture.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/ParallelFuture.h new file mode 100644 index 0000000000000000000000000000000000000000..042cd92da19345d7523671ca75da7279d13062a9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/ParallelFuture.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include +#include + +namespace at { + +// Launches intra-op parallel task, returns a future +TORCH_API c10::intrusive_ptr intraop_launch_future( + std::function func); + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h new file mode 100644 index 0000000000000000000000000000000000000000..129e4f0c8ef58255fc2080550df2fcb947fcacd5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h @@ -0,0 +1,3133 @@ +// This file contains all native_functions that can be registered to +// and the schema string that they should be registered with + +Tensor _cast_Byte(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Byte(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Char(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Char(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Double(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Double(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Float(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Float(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Int(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Int(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Long(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Long(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Short(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Short(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Half(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Half(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +void _backward(const Tensor & self, TensorList inputs, const ::std::optional & gradient, ::std::optional retain_graph, bool create_graph); // {"schema": "aten::_backward(Tensor self, Tensor[] inputs, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> ()", "dispatch": "False", "default": "True"} +void set_data(Tensor & self, const Tensor & new_data); // {"schema": "aten::set_data(Tensor(a!) self, Tensor new_data) -> ()", "dispatch": "False", "default": "True"} +Tensor data(const Tensor & self); // {"schema": "aten::data(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +bool is_leaf(const Tensor & self); // {"schema": "aten::is_leaf(Tensor self) -> bool", "dispatch": "False", "default": "True"} +int64_t output_nr(const Tensor & self); // {"schema": "aten::output_nr(Tensor self) -> int", "dispatch": "False", "default": "True"} +int64_t _version(const Tensor & self); // {"schema": "aten::_version(Tensor self) -> int", "dispatch": "False", "default": "True"} +Tensor & requires_grad_(Tensor & self, bool requires_grad); // {"schema": "aten::requires_grad_(Tensor(a!) self, bool requires_grad=True) -> Tensor(a!)", "dispatch": "False", "default": "True"} +void retain_grad(Tensor & self); // {"schema": "aten::retain_grad(Tensor(a!) self) -> ()", "dispatch": "False", "default": "True"} +bool retains_grad(const Tensor & self); // {"schema": "aten::retains_grad(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor _fw_primal(const Tensor & self, int64_t level); // {"schema": "aten::_fw_primal(Tensor(a) self, int level) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor _make_dual(const Tensor & primal, const Tensor & tangent, int64_t level); // {"schema": "aten::_make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)", "dispatch": "True", "default": "True"} +::std::tuple _unpack_dual(const Tensor & dual, int64_t level); // {"schema": "aten::_unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)", "dispatch": "False", "default": "True"} +Tensor _new_zeros_with_same_feature_meta(const Tensor & self, const Tensor & other, int64_t self_num_batch_dims); // {"schema": "aten::_new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor", "dispatch": "True", "default": "True"} +bool _has_same_storage_numel(const Tensor & self, const Tensor & other); // {"schema": "aten::_has_same_storage_numel(Tensor self, Tensor other) -> bool", "dispatch": "True", "default": "True"} +Tensor & rename_(Tensor & self, ::std::optional names); // {"schema": "aten::rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor rename(const Tensor & self, ::std::optional names); // {"schema": "aten::rename(Tensor(a) self, Dimname[]? names) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor align_to(const Tensor & self, DimnameList names); // {"schema": "aten::align_to(Tensor(a) self, Dimname[] names) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor align_to(const Tensor & self, DimnameList order, int64_t ellipsis_idx); // {"schema": "aten::align_to.ellipsis_idx(Tensor(a) self, Dimname[] order, int ellipsis_idx) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor align_as(const Tensor & self, const Tensor & other); // {"schema": "aten::align_as(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector align_tensors(TensorList tensors); // {"schema": "aten::align_tensors(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +void _assert_async(const Tensor & self); // {"schema": "aten::_assert_async(Tensor self) -> ()", "dispatch": "True", "default": "False"} +void _assert_async(const Tensor & self, c10::string_view assert_msg); // {"schema": "aten::_assert_async.msg(Tensor self, str assert_msg) -> ()", "dispatch": "True", "default": "False"} +void _assert_scalar(const Scalar & self, c10::string_view assert_msg); // {"schema": "aten::_assert_scalar(Scalar self, str assert_msg) -> ()", "dispatch": "True", "default": "True"} +Tensor _functional_assert_scalar(const Scalar & self, c10::string_view assert_msg, const Tensor & dep_token); // {"schema": "aten::_functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _functional_assert_async(const Tensor & self, c10::string_view assert_msg, const Tensor & dep_token); // {"schema": "aten::_functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "False"} +void _assert_tensor_metadata(const Tensor & a, OptionalSymIntArrayRef size, OptionalSymIntArrayRef stride, ::std::optional dtype); // {"schema": "aten::_assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()", "dispatch": "False", "default": "True"} +void _print(c10::string_view s); // {"schema": "aten::_print(str s) -> ()", "dispatch": "True", "default": "True"} +void sym_constrain_range(const Scalar & size, ::std::optional min, ::std::optional max); // {"schema": "aten::sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()", "dispatch": "True", "default": "True"} +void sym_constrain_range_for_size(const Scalar & size, ::std::optional min, ::std::optional max); // {"schema": "aten::sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()", "dispatch": "True", "default": "True"} +Tensor _functional_sym_constrain_range(const Scalar & size, ::std::optional min, ::std::optional max, const Tensor & dep_token); // {"schema": "aten::_functional_sym_constrain_range(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _functional_sym_constrain_range_for_size(const Scalar & size, ::std::optional min, ::std::optional max, const Tensor & dep_token); // {"schema": "aten::_functional_sym_constrain_range_for_size(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _make_dep_token(::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::_make_dep_token(*, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor refine_names(const Tensor & self, DimnameList names); // {"schema": "aten::refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a)", "dispatch": "False", "default": "True"} +bool _use_cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank); // {"schema": "aten::_use_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool", "dispatch": "True", "default": "False"} +bool _use_cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank); // {"schema": "aten::_use_cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank) -> bool", "dispatch": "True", "default": "False"} +::std::tuple _cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool deterministic, bool zero_infinity); // {"schema": "aten::_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, bool deterministic, bool zero_infinity); // {"schema": "aten::_cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +bool _use_cudnn_rnn_flatten_weight(); // {"schema": "aten::_use_cudnn_rnn_flatten_weight() -> bool", "dispatch": "False", "default": "True"} +Tensor _cudnn_rnn_flatten_weight(TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional); // {"schema": "aten::_cudnn_rnn_flatten_weight(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _cudnn_rnn(const Tensor & input, TensorList weight, int64_t weight_stride0, const ::std::optional & weight_buf, const Tensor & hx, const ::std::optional & cx, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const ::std::optional & dropout_state); // {"schema": "aten::_cudnn_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor? weight_buf, Tensor hx, Tensor? cx, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple> _cudnn_rnn_backward(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const ::std::optional & cx, const Tensor & output, const ::std::optional & grad_output, const ::std::optional & grad_hy, const ::std::optional & grad_cy, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const ::std::optional & dropout_state, const Tensor & reserve, ::std::array output_mask); // {"schema": "aten::_cudnn_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])", "dispatch": "True", "default": "False"} +Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "False"} +int64_t _debug_has_internal_overlap(const Tensor & self); // {"schema": "aten::_debug_has_internal_overlap(Tensor self) -> int", "dispatch": "False", "default": "True"} +::std::tuple _fused_dropout(const Tensor & self, double p, ::std::optional generator); // {"schema": "aten::_fused_dropout(Tensor self, float p, Generator? generator=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _masked_scale(const Tensor & self, const Tensor & mask, double scale); // {"schema": "aten::_masked_scale(Tensor self, Tensor mask, float scale) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple native_dropout(const Tensor & input, double p, ::std::optional train); // {"schema": "aten::native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor native_dropout_backward(const Tensor & grad_output, const Tensor & mask, double scale); // {"schema": "aten::native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _sobol_engine_draw(const Tensor & quasi, int64_t n, const Tensor & sobolstate, int64_t dimension, int64_t num_generated, ::std::optional dtype); // {"schema": "aten::_sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor & _sobol_engine_ff_(Tensor & self, int64_t n, const Tensor & sobolstate, int64_t dimension, int64_t num_generated); // {"schema": "aten::_sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & _sobol_engine_scramble_(Tensor & self, const Tensor & ltm, int64_t dimension); // {"schema": "aten::_sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & _sobol_engine_initialize_state_(Tensor & self, int64_t dimension); // {"schema": "aten::_sobol_engine_initialize_state_(Tensor(a!) self, int dimension) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor _reshape_from_tensor(const Tensor & self, const Tensor & shape); // {"schema": "aten::_reshape_from_tensor(Tensor self, Tensor shape) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _shape_as_tensor(const Tensor & self); // {"schema": "aten::_shape_as_tensor(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor dropout(const Tensor & input, double p, bool train); // {"schema": "aten::dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & dropout_(Tensor & self, double p, bool train); // {"schema": "aten::dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor feature_dropout(const Tensor & input, double p, bool train); // {"schema": "aten::feature_dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & feature_dropout_(Tensor & self, double p, bool train); // {"schema": "aten::feature_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor alpha_dropout(const Tensor & input, double p, bool train); // {"schema": "aten::alpha_dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & alpha_dropout_(Tensor & self, double p, bool train); // {"schema": "aten::alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor feature_alpha_dropout(const Tensor & input, double p, bool train); // {"schema": "aten::feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & feature_alpha_dropout_(Tensor & self, double p, bool train); // {"schema": "aten::feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor abs(const Tensor & self); // {"schema": "aten::abs(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & abs_(Tensor & self); // {"schema": "aten::abs_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & abs_out(const Tensor & self, Tensor & out); // {"schema": "aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor absolute(const Tensor & self); // {"schema": "aten::absolute(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & absolute_(Tensor & self); // {"schema": "aten::absolute_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & absolute_out(const Tensor & self, Tensor & out); // {"schema": "aten::absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor angle(const Tensor & self); // {"schema": "aten::angle(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & angle_out(const Tensor & self, Tensor & out); // {"schema": "aten::angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor view_as_real(const Tensor & self); // {"schema": "aten::view_as_real(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor view_as_complex(const Tensor & self); // {"schema": "aten::view_as_complex(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor sgn(const Tensor & self); // {"schema": "aten::sgn(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sgn_(Tensor & self); // {"schema": "aten::sgn_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sgn_out(const Tensor & self, Tensor & out); // {"schema": "aten::sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor chalf(const Tensor & self, ::std::optional memory_format); // {"schema": "aten::chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor real(const Tensor & self); // {"schema": "aten::real(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor imag(const Tensor & self); // {"schema": "aten::imag(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _conj(const Tensor & self); // {"schema": "aten::_conj(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor conj(const Tensor & self); // {"schema": "aten::conj(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _conj_physical(const Tensor & self); // {"schema": "aten::_conj_physical(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor conj_physical(const Tensor & self); // {"schema": "aten::conj_physical(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & conj_physical_out(const Tensor & self, Tensor & out); // {"schema": "aten::conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & conj_physical_(Tensor & self); // {"schema": "aten::conj_physical_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resolve_conj(const Tensor & self); // {"schema": "aten::resolve_conj(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor resolve_neg(const Tensor & self); // {"schema": "aten::resolve_neg(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _neg_view(const Tensor & self); // {"schema": "aten::_neg_view(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor acos(const Tensor & self); // {"schema": "aten::acos(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & acos_(Tensor & self); // {"schema": "aten::acos_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & acos_out(const Tensor & self, Tensor & out); // {"schema": "aten::acos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arccos(const Tensor & self); // {"schema": "aten::arccos(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arccos_(Tensor & self); // {"schema": "aten::arccos_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arccos_out(const Tensor & self, Tensor & out); // {"schema": "aten::arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor avg_pool1d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad); // {"schema": "aten::avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor adaptive_avg_pool1d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple adaptive_max_pool1d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor add(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & add_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & add_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _add_relu(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _add_relu_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_add_relu_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _add_relu_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::_add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _add_relu(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::_add_relu.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _add_relu_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::_add_relu_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor add(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & add_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor addmv(const Tensor & self, const Tensor & mat, const Tensor & vec, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addmv_(Tensor & self, const Tensor & mat, const Tensor & vec, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmv_(Tensor(a!) self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addmv_out(const Tensor & self, const Tensor & mat, const Tensor & vec, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addmv.out(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addr(const Tensor & self, const Tensor & vec1, const Tensor & vec2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addr_(Tensor & self, const Tensor & vec1, const Tensor & vec2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addr_out(const Tensor & self, const Tensor & vec1, const Tensor & vec2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor affine_grid_generator(const Tensor & theta, c10::SymIntArrayRef size, bool align_corners); // {"schema": "aten::affine_grid_generator(Tensor theta, SymInt[] size, bool align_corners) -> Tensor", "dispatch": "True", "default": "True"} +Tensor affine_grid_generator_backward(const Tensor & grad, c10::SymIntArrayRef size, bool align_corners); // {"schema": "aten::affine_grid_generator_backward(Tensor grad, SymInt[] size, bool align_corners) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _is_all_true(const Tensor & self); // {"schema": "aten::_is_all_true(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _is_any_true(const Tensor & self); // {"schema": "aten::_is_any_true(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_check_tensor(const Tensor & self); // {"schema": "aten::_test_check_tensor(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_functorch_fallback(const Tensor & self, const Tensor & other); // {"schema": "aten::_test_functorch_fallback(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor all(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor all(const Tensor & self, OptionalIntArrayRef dim, bool keepdim); // {"schema": "aten::all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & all_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & out); // {"schema": "aten::all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & all_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor all(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & all_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & out); // {"schema": "aten::all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +bool allclose(const Tensor & self, const Tensor & other, double rtol, double atol, bool equal_nan); // {"schema": "aten::allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool", "dispatch": "True", "default": "True"} +Tensor any(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::any.dim(Tensor self, int dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor any(const Tensor & self, OptionalIntArrayRef dim, bool keepdim); // {"schema": "aten::any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & any_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & out); // {"schema": "aten::any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & any_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor any(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & any_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & out); // {"schema": "aten::any.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor arange(const Scalar & end, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor arange(const Scalar & start, const Scalar & end, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor arange(const Scalar & start, const Scalar & end, const Scalar & step, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & arange_out(const Scalar & end, Tensor & out); // {"schema": "aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & arange_out(const Scalar & start, const Scalar & end, const Scalar & step, Tensor & out); // {"schema": "aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _dim_arange(const Tensor & like, int64_t dim); // {"schema": "aten::_dim_arange(Tensor like, int dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor argmax(const Tensor & self, ::std::optional dim, bool keepdim); // {"schema": "aten::argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & argmax_out(const Tensor & self, ::std::optional dim, bool keepdim, Tensor & out); // {"schema": "aten::argmax.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor argmin(const Tensor & self, ::std::optional dim, bool keepdim); // {"schema": "aten::argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & argmin_out(const Tensor & self, ::std::optional dim, bool keepdim, Tensor & out); // {"schema": "aten::argmin.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor acosh(const Tensor & self); // {"schema": "aten::acosh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & acosh_(Tensor & self); // {"schema": "aten::acosh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & acosh_out(const Tensor & self, Tensor & out); // {"schema": "aten::acosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arccosh(const Tensor & self); // {"schema": "aten::arccosh(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arccosh_(Tensor & self); // {"schema": "aten::arccosh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arccosh_out(const Tensor & self, Tensor & out); // {"schema": "aten::arccosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor asinh(const Tensor & self); // {"schema": "aten::asinh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & asinh_(Tensor & self); // {"schema": "aten::asinh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & asinh_out(const Tensor & self, Tensor & out); // {"schema": "aten::asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arcsinh(const Tensor & self); // {"schema": "aten::arcsinh(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arcsinh_(Tensor & self); // {"schema": "aten::arcsinh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arcsinh_out(const Tensor & self, Tensor & out); // {"schema": "aten::arcsinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor atanh(const Tensor & self); // {"schema": "aten::atanh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & atanh_(Tensor & self); // {"schema": "aten::atanh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & atanh_out(const Tensor & self, Tensor & out); // {"schema": "aten::atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arctanh(const Tensor & self); // {"schema": "aten::arctanh(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arctanh_(Tensor & self); // {"schema": "aten::arctanh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arctanh_out(const Tensor & self, Tensor & out); // {"schema": "aten::arctanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor as_strided(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); // {"schema": "aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)", "dispatch": "True", "default": "False"} +const Tensor & as_strided_(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); // {"schema": "aten::as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor asin(const Tensor & self); // {"schema": "aten::asin(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & asin_(Tensor & self); // {"schema": "aten::asin_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & asin_out(const Tensor & self, Tensor & out); // {"schema": "aten::asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arcsin(const Tensor & self); // {"schema": "aten::arcsin(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arcsin_(Tensor & self); // {"schema": "aten::arcsin_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arcsin_out(const Tensor & self, Tensor & out); // {"schema": "aten::arcsin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor atan(const Tensor & self); // {"schema": "aten::atan(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & atan_(Tensor & self); // {"schema": "aten::atan_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & atan_out(const Tensor & self, Tensor & out); // {"schema": "aten::atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arctan(const Tensor & self); // {"schema": "aten::arctan(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arctan_(Tensor & self); // {"schema": "aten::arctan_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arctan_out(const Tensor & self, Tensor & out); // {"schema": "aten::arctan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor atleast_1d(const Tensor & self); // {"schema": "aten::atleast_1d(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector atleast_1d(TensorList tensors); // {"schema": "aten::atleast_1d.Sequence(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor atleast_2d(const Tensor & self); // {"schema": "aten::atleast_2d(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector atleast_2d(TensorList tensors); // {"schema": "aten::atleast_2d.Sequence(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor atleast_3d(const Tensor & self); // {"schema": "aten::atleast_3d(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector atleast_3d(TensorList tensors); // {"schema": "aten::atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor baddbmm(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & baddbmm_(Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & baddbmm_out(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bartlett_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bartlett_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::bartlett_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor batch_norm(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps, bool cudnn_enabled); // {"schema": "aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantized_batch_norm(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & mean, const Tensor & var, double eps, double output_scale, int64_t output_zero_point); // {"schema": "aten::quantized_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _batch_norm_impl_index(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps, bool cudnn_enabled); // {"schema": "aten::_batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)", "dispatch": "False", "default": "True"} +::std::tuple _batch_norm_impl_index_backward(int64_t impl_index, const Tensor & input, const Tensor & grad_output, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var_transform, bool train, double eps, ::std::array output_mask, const Tensor & reservedSpace); // {"schema": "aten::_batch_norm_impl_index_backward(int impl_index, Tensor input, Tensor grad_output, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var_transform, bool train, float eps, bool[3] output_mask, Tensor reservedSpace) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor bernoulli(const Tensor & self, ::std::optional generator); // {"schema": "aten::bernoulli(Tensor self, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bernoulli_out(const Tensor & self, ::std::optional generator, Tensor & out); // {"schema": "aten::bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bernoulli_(Tensor & self, const Tensor & p, ::std::optional generator); // {"schema": "aten::bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bernoulli_(Tensor & self, double p, ::std::optional generator); // {"schema": "aten::bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bernoulli(const Tensor & self, double p, ::std::optional generator); // {"schema": "aten::bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bilinear(const Tensor & input1, const Tensor & input2, const Tensor & weight, const ::std::optional & bias); // {"schema": "aten::bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor binary_cross_entropy(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction); // {"schema": "aten::binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & binary_cross_entropy_out(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, Tensor & out); // {"schema": "aten::binary_cross_entropy.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor binary_cross_entropy_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction); // {"schema": "aten::binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & binary_cross_entropy_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, Tensor & grad_input); // {"schema": "aten::binary_cross_entropy_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor binary_cross_entropy_with_logits(const Tensor & self, const Tensor & target, const ::std::optional & weight, const ::std::optional & pos_weight, int64_t reduction); // {"schema": "aten::binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bincount(const Tensor & self, const ::std::optional & weights, int64_t minlength); // {"schema": "aten::bincount(Tensor self, Tensor? weights=None, int minlength=0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor bitwise_not(const Tensor & self); // {"schema": "aten::bitwise_not(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_not_(Tensor & self); // {"schema": "aten::bitwise_not_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_not_out(const Tensor & self, Tensor & out); // {"schema": "aten::bitwise_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & copysign_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor copysign(const Tensor & self, const Tensor & other); // {"schema": "aten::copysign.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copysign_(Tensor & self, const Tensor & other); // {"schema": "aten::copysign_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor copysign(const Tensor & self, const Scalar & other); // {"schema": "aten::copysign.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copysign_(Tensor & self, const Scalar & other); // {"schema": "aten::copysign_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & copysign_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::copysign.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _lazy_clone(const Tensor & self); // {"schema": "aten::_lazy_clone(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logical_not(const Tensor & self); // {"schema": "aten::logical_not(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_not_(Tensor & self); // {"schema": "aten::logical_not_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_not_out(const Tensor & self, Tensor & out); // {"schema": "aten::logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logical_xor(const Tensor & self, const Tensor & other); // {"schema": "aten::logical_xor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_xor_(Tensor & self, const Tensor & other); // {"schema": "aten::logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_xor_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logical_and(const Tensor & self, const Tensor & other); // {"schema": "aten::logical_and(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_and_(Tensor & self, const Tensor & other); // {"schema": "aten::logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_and_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logical_or(const Tensor & self, const Tensor & other); // {"schema": "aten::logical_or(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_or_(Tensor & self, const Tensor & other); // {"schema": "aten::logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_or_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor blackman_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor blackman_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bmm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::bmm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bmm_out(const Tensor & self, const Tensor & mat2, Tensor & out); // {"schema": "aten::bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector broadcast_tensors(TensorList tensors); // {"schema": "aten::broadcast_tensors(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor broadcast_to(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::broadcast_to(Tensor(a) self, SymInt[] size) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _sparse_broadcast_to(const Tensor & self, IntArrayRef size); // {"schema": "aten::_sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor cat(const ITensorListRef & tensors, int64_t dim); // {"schema": "aten::cat(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cat_out(const ITensorListRef & tensors, int64_t dim, Tensor & out); // {"schema": "aten::cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cat(TensorList tensors, Dimname dim); // {"schema": "aten::cat.names(Tensor[] tensors, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cat_out(TensorList tensors, Dimname dim, Tensor & out); // {"schema": "aten::cat.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concat(TensorList tensors, int64_t dim); // {"schema": "aten::concat(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concat_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::concat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concat(TensorList tensors, Dimname dim); // {"schema": "aten::concat.names(Tensor[] tensors, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concat_out(TensorList tensors, Dimname dim, Tensor & out); // {"schema": "aten::concat.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concatenate(TensorList tensors, int64_t dim); // {"schema": "aten::concatenate(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concatenate_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concatenate(TensorList tensors, Dimname dim); // {"schema": "aten::concatenate.names(Tensor[] tensors, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concatenate_out(TensorList tensors, Dimname dim, Tensor & out); // {"schema": "aten::concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor block_diag(TensorList tensors); // {"schema": "aten::block_diag(Tensor[] tensors) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ceil(const Tensor & self); // {"schema": "aten::ceil(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ceil_(Tensor & self); // {"schema": "aten::ceil_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ceil_out(const Tensor & self, Tensor & out); // {"schema": "aten::ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor chain_matmul(TensorList matrices); // {"schema": "aten::chain_matmul(Tensor[] matrices) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & chain_matmul_out(TensorList matrices, Tensor & out); // {"schema": "aten::chain_matmul.out(Tensor[] matrices, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::vector unsafe_chunk(const Tensor & self, int64_t chunks, int64_t dim); // {"schema": "aten::unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector chunk(const Tensor & self, int64_t chunks, int64_t dim); // {"schema": "aten::chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector tensor_split(const Tensor & self, c10::SymInt sections, int64_t dim); // {"schema": "aten::tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector tensor_split(const Tensor & self, c10::SymIntArrayRef indices, int64_t dim); // {"schema": "aten::tensor_split.indices(Tensor(a -> *) self, SymInt[] indices, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector tensor_split(const Tensor & self, const Tensor & tensor_indices_or_sections, int64_t dim); // {"schema": "aten::tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +Tensor clamp(const Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor clamp(const Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & clamp_(Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_(Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clamp_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_out(const Tensor & self, const ::std::optional & min, const ::std::optional & max, Tensor & out); // {"schema": "aten::clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & clamp_out(const Tensor & self, const ::std::optional & min, const ::std::optional & max, Tensor & out); // {"schema": "aten::clamp.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor clamp_max(const Tensor & self, const Scalar & max); // {"schema": "aten::clamp_max(Tensor self, Scalar max) -> Tensor", "dispatch": "True", "default": "True"} +Tensor clamp_max(const Tensor & self, const Tensor & max); // {"schema": "aten::clamp_max.Tensor(Tensor self, Tensor max) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & clamp_max_(Tensor & self, const Scalar & max); // {"schema": "aten::clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_max_(Tensor & self, const Tensor & max); // {"schema": "aten::clamp_max_.Tensor(Tensor(a!) self, Tensor max) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_max_out(const Tensor & self, const Scalar & max, Tensor & out); // {"schema": "aten::clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & clamp_max_out(const Tensor & self, const Tensor & max, Tensor & out); // {"schema": "aten::clamp_max.Tensor_out(Tensor self, Tensor max, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor clamp_min(const Tensor & self, const Scalar & min); // {"schema": "aten::clamp_min(Tensor self, Scalar min) -> Tensor", "dispatch": "True", "default": "True"} +Tensor clamp_min(const Tensor & self, const Tensor & min); // {"schema": "aten::clamp_min.Tensor(Tensor self, Tensor min) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & clamp_min_(Tensor & self, const Scalar & min); // {"schema": "aten::clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_min_(Tensor & self, const Tensor & min); // {"schema": "aten::clamp_min_.Tensor(Tensor(a!) self, Tensor min) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_min_out(const Tensor & self, const Scalar & min, Tensor & out); // {"schema": "aten::clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & clamp_min_out(const Tensor & self, const Tensor & min, Tensor & out); // {"schema": "aten::clamp_min.Tensor_out(Tensor self, Tensor min, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor clip(const Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor clip(const Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clip.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & clip_(Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clip_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & clip_(Tensor & self, const ::std::optional & min, const ::std::optional & max); // {"schema": "aten::clip_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & clip_out(const Tensor & self, const ::std::optional & min, const ::std::optional & max, Tensor & out); // {"schema": "aten::clip.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & clip_out(const Tensor & self, const ::std::optional & min, const ::std::optional & max, Tensor & out); // {"schema": "aten::clip.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +bool cudnn_is_acceptable(const Tensor & self); // {"schema": "aten::cudnn_is_acceptable(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor complex(const Tensor & real, const Tensor & imag); // {"schema": "aten::complex(Tensor real, Tensor imag) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & complex_out(const Tensor & real, const Tensor & imag, Tensor & out); // {"schema": "aten::complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor polar(const Tensor & abs, const Tensor & angle); // {"schema": "aten::polar(Tensor abs, Tensor angle) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & polar_out(const Tensor & abs, const Tensor & angle, Tensor & out); // {"schema": "aten::polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor constant_pad_nd(const Tensor & self, c10::SymIntArrayRef pad, const Scalar & value); // {"schema": "aten::constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor contiguous(const Tensor & self, MemoryFormat memory_format); // {"schema": "aten::contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor convolution(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); // {"schema": "aten::convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward(const Tensor & grad_output, const Tensor & input, const Tensor & weight, OptionalSymIntArrayRef bias_sizes, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor convolution_overrideable(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); // {"schema": "aten::convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward_overrideable(const Tensor & grad_output, const Tensor & input, const Tensor & weight, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)", "dispatch": "True", "default": "True"} +Tensor _convolution(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32); // {"schema": "aten::_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _convolution(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, IntArrayRef output_padding, c10::SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled); // {"schema": "aten::_convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _convolution_mode(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::_convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _convolution_double_backward(const ::std::optional & ggI, const ::std::optional & ggW, const ::std::optional & ggb, const Tensor & gO, const Tensor & weight, const Tensor & self, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::_convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor conv1d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv2d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv3d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv1d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding=\"valid\", SymInt[1] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv2d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding=\"valid\", SymInt[2] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv3d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding=\"valid\", SymInt[3] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv_tbc(const Tensor & self, const Tensor & weight, const Tensor & bias, int64_t pad); // {"schema": "aten::conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple conv_tbc_backward(const Tensor & self, const Tensor & input, const Tensor & weight, const Tensor & bias, int64_t pad); // {"schema": "aten::conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor conv_transpose1d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymInt groups, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv_transpose2d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymInt groups, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv_transpose3d(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymInt groups, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor copy(const Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copy_(Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _copy_from(const Tensor & self, const Tensor & dst, bool non_blocking); // {"schema": "aten::_copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _copy_from_and_resize(const Tensor & self, const Tensor & dst); // {"schema": "aten::_copy_from_and_resize(Tensor self, Tensor dst) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cos(const Tensor & self); // {"schema": "aten::cos(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cos_(Tensor & self); // {"schema": "aten::cos_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cos_out(const Tensor & self, Tensor & out); // {"schema": "aten::cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cosh(const Tensor & self); // {"schema": "aten::cosh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cosh_(Tensor & self); // {"schema": "aten::cosh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cosh_out(const Tensor & self, Tensor & out); // {"schema": "aten::cosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cosine_embedding_loss(const Tensor & input1, const Tensor & input2, const Tensor & target, double margin, int64_t reduction); // {"schema": "aten::cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor count_nonzero(const Tensor & self, IntArrayRef dim); // {"schema": "aten::count_nonzero.dim_IntList(Tensor self, int[] dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor count_nonzero(const Tensor & self, ::std::optional dim); // {"schema": "aten::count_nonzero(Tensor self, int? dim=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor cov(const Tensor & self, int64_t correction, const ::std::optional & fweights, const ::std::optional & aweights); // {"schema": "aten::cov(Tensor self, *, int correction=1, Tensor? fweights=None, Tensor? aweights=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor corrcoef(const Tensor & self); // {"schema": "aten::corrcoef(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cudnn_affine_grid_generator(const Tensor & theta, int64_t N, int64_t C, int64_t H, int64_t W); // {"schema": "aten::cudnn_affine_grid_generator(Tensor theta, int N, int C, int H, int W) -> Tensor grid", "dispatch": "True", "default": "False"} +Tensor cudnn_affine_grid_generator_backward(const Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W); // {"schema": "aten::cudnn_affine_grid_generator_backward(Tensor grad, int N, int C, int H, int W) -> Tensor grad_theta", "dispatch": "True", "default": "False"} +::std::tuple cudnn_batch_norm(const Tensor & input, const Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon); // {"schema": "aten::cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple cudnn_batch_norm_backward(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, const Tensor & reserveSpace); // {"schema": "aten::cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32); // {"schema": "aten::cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & cudnn_convolution_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, Tensor & out); // {"schema": "aten::cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution_transpose(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32); // {"schema": "aten::cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _mps_convolution_transpose(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::_mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mps_convolution_transpose_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution_relu(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution_add_relu(const Tensor & self, const Tensor & weight, const Tensor & z, const ::std::optional & alpha, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cudnn_grid_sampler(const Tensor & self, const Tensor & grid); // {"schema": "aten::cudnn_grid_sampler(Tensor self, Tensor grid) -> Tensor output", "dispatch": "True", "default": "False"} +::std::tuple cudnn_grid_sampler_backward(const Tensor & self, const Tensor & grid, const Tensor & grad_output); // {"schema": "aten::cudnn_grid_sampler_backward(Tensor self, Tensor grid, Tensor grad_output) -> (Tensor grad_self, Tensor grad_grid)", "dispatch": "True", "default": "False"} +::std::tuple cummax(const Tensor & self, int64_t dim); // {"schema": "aten::cummax(Tensor self, int dim) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple cummax_out(const Tensor & self, int64_t dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummax.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple cummax(const Tensor & self, Dimname dim); // {"schema": "aten::cummax.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple cummax_out(const Tensor & self, Dimname dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummax.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +void _cummax_helper(const Tensor & self, Tensor & values, Tensor & indices, int64_t dim); // {"schema": "aten::_cummax_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()", "dispatch": "True", "default": "False"} +::std::tuple cummin(const Tensor & self, int64_t dim); // {"schema": "aten::cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple cummin_out(const Tensor & self, int64_t dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummin.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple cummin(const Tensor & self, Dimname dim); // {"schema": "aten::cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple cummin_out(const Tensor & self, Dimname dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummin.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +void _cummin_helper(const Tensor & self, Tensor & values, Tensor & indices, int64_t dim); // {"schema": "aten::_cummin_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()", "dispatch": "True", "default": "False"} +Tensor cummaxmin_backward(const Tensor & grad, const Tensor & input, const Tensor & indices, int64_t dim); // {"schema": "aten::cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cumprod(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cumprod_(Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::cumprod_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cumprod_out(const Tensor & self, int64_t dim, ::std::optional dtype, Tensor & out); // {"schema": "aten::cumprod.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cumprod(const Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cumprod_(Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::cumprod_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & cumprod_out(const Tensor & self, Dimname dim, ::std::optional dtype, Tensor & out); // {"schema": "aten::cumprod.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor cumprod_backward(const Tensor & grad, const Tensor & input, int64_t dim, const Tensor & output); // {"schema": "aten::cumprod_backward(Tensor grad, Tensor input, int dim, Tensor output) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cumsum(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cumsum_(Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cumsum_out(const Tensor & self, int64_t dim, ::std::optional dtype, Tensor & out); // {"schema": "aten::cumsum.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cumsum(const Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cumsum_(Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::cumsum_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & cumsum_out(const Tensor & self, Dimname dim, ::std::optional dtype, Tensor & out); // {"schema": "aten::cumsum.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor cumulative_trapezoid(const Tensor & y, const Tensor & x, int64_t dim); // {"schema": "aten::cumulative_trapezoid.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cumulative_trapezoid(const Tensor & y, const Scalar & dx, int64_t dim); // {"schema": "aten::cumulative_trapezoid.dx(Tensor y, *, Scalar dx=1, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, int64_t reduction, bool zero_infinity); // {"schema": "aten::ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, int64_t reduction, bool zero_infinity); // {"schema": "aten::ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _ctc_loss_backward(const Tensor & grad, const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss_backward(Tensor grad, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _ctc_loss_backward(const Tensor & grad, const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss_backward.Tensor(Tensor grad, Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor diag_embed(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor diagflat(const Tensor & self, int64_t offset); // {"schema": "aten::diagflat(Tensor self, int offset=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor diagonal(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor linalg_diagonal(const Tensor & A, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor diagonal(const Tensor & self, Dimname outdim, Dimname dim1, Dimname dim2, int64_t offset); // {"schema": "aten::diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor diagonal_backward(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal_backward(Tensor grad_output, SymInt[] input_sizes, int offset, int dim1, int dim2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fill_diagonal_(Tensor & self, const Scalar & fill_value, bool wrap); // {"schema": "aten::fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor diff(const Tensor & self, int64_t n, int64_t dim, const ::std::optional & prepend, const ::std::optional & append); // {"schema": "aten::diff(Tensor self, int n=1, int dim=-1, Tensor? prepend=None, Tensor? append=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & diff_out(const Tensor & self, int64_t n, int64_t dim, const ::std::optional & prepend, const ::std::optional & append, Tensor & out); // {"schema": "aten::diff.out(Tensor self, int n=1, int dim=-1, Tensor? prepend=None, Tensor? append=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, const ::std::optional & spacing, ::std::optional dim, int64_t edge_order); // {"schema": "aten::gradient.scalarint(Tensor self, *, Scalar? spacing=None, int? dim=None, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, const Scalar & spacing, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.scalararray(Tensor self, *, Scalar spacing, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.array(Tensor self, *, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, ArrayRef spacing, ::std::optional dim, int64_t edge_order); // {"schema": "aten::gradient.scalarrayint(Tensor self, *, Scalar[] spacing, int? dim=None, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, ArrayRef spacing, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.scalarrayarray(Tensor self, *, Scalar[] spacing, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, TensorList spacing, ::std::optional dim, int64_t edge_order); // {"schema": "aten::gradient.tensorarrayint(Tensor self, *, Tensor[] spacing, int? dim=None, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, TensorList spacing, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.tensorarray(Tensor self, *, Tensor[] spacing, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor div(const Tensor & self, const Tensor & other); // {"schema": "aten::div.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Tensor & other); // {"schema": "aten::div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor div(const Tensor & self, const Tensor & other, ::std::optional rounding_mode); // {"schema": "aten::div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Tensor & other, ::std::optional rounding_mode); // {"schema": "aten::div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Tensor & other, ::std::optional rounding_mode, Tensor & out); // {"schema": "aten::div.out_mode(Tensor self, Tensor other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor div(const Tensor & self, const Scalar & other); // {"schema": "aten::div.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Scalar & other); // {"schema": "aten::div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor div(const Tensor & self, const Scalar & other, ::std::optional rounding_mode); // {"schema": "aten::div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Scalar & other, ::std::optional rounding_mode); // {"schema": "aten::div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor divide(const Tensor & self, const Tensor & other); // {"schema": "aten::divide.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Tensor & other); // {"schema": "aten::divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & divide_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor divide(const Tensor & self, const Scalar & other); // {"schema": "aten::divide.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Scalar & other); // {"schema": "aten::divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor divide(const Tensor & self, const Tensor & other, ::std::optional rounding_mode); // {"schema": "aten::divide.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Tensor & other, ::std::optional rounding_mode); // {"schema": "aten::divide_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & divide_out(const Tensor & self, const Tensor & other, ::std::optional rounding_mode, Tensor & out); // {"schema": "aten::divide.out_mode(Tensor self, Tensor other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor divide(const Tensor & self, const Scalar & other, ::std::optional rounding_mode); // {"schema": "aten::divide.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Scalar & other, ::std::optional rounding_mode); // {"schema": "aten::divide_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor true_divide(const Tensor & self, const Tensor & other); // {"schema": "aten::true_divide.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & true_divide_(Tensor & self, const Tensor & other); // {"schema": "aten::true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & true_divide_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor true_divide(const Tensor & self, const Scalar & other); // {"schema": "aten::true_divide.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & true_divide_(Tensor & self, const Scalar & other); // {"schema": "aten::true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor dot(const Tensor & self, const Tensor & tensor); // {"schema": "aten::dot(Tensor self, Tensor tensor) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & dot_out(const Tensor & self, const Tensor & tensor, Tensor & out); // {"schema": "aten::dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor vdot(const Tensor & self, const Tensor & other); // {"schema": "aten::vdot(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & vdot_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::vdot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor einsum(c10::string_view equation, TensorList tensors, OptionalIntArrayRef path); // {"schema": "aten::einsum(str equation, Tensor[] tensors, *, int[]? path=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor embedding(const Tensor & weight, const Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse); // {"schema": "aten::embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor embedding_backward(const Tensor & grad, const Tensor & indices, c10::SymInt num_weights, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse); // {"schema": "aten::embedding_backward(Tensor grad, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor", "dispatch": "False", "default": "True"} +Tensor embedding_dense_backward(const Tensor & grad_output, const Tensor & indices, c10::SymInt num_weights, c10::SymInt padding_idx, bool scale_grad_by_freq); // {"schema": "aten::embedding_dense_backward(Tensor grad_output, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & embedding_renorm_(Tensor & self, const Tensor & indices, double max_norm, double norm_type); // {"schema": "aten::embedding_renorm_(Tensor(a!) self, Tensor indices, float max_norm, float norm_type) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor embedding_sparse_backward(const Tensor & grad, const Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); // {"schema": "aten::embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _embedding_bag_forward_only(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx); // {"schema": "aten::_embedding_bag_forward_only(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _rowwise_prune(const Tensor & weight, const Tensor & mask, ScalarType compressed_indices_dtype); // {"schema": "aten::_rowwise_prune(Tensor weight, Tensor mask, ScalarType compressed_indices_dtype) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor row_stack(TensorList tensors); // {"schema": "aten::row_stack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & row_stack_out(TensorList tensors, Tensor & out); // {"schema": "aten::row_stack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple embedding_bag(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, bool include_last_offset); // {"schema": "aten::embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple embedding_bag(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, bool include_last_offset, ::std::optional padding_idx); // {"schema": "aten::embedding_bag.padding_idx(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, bool include_last_offset, int? padding_idx) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _embedding_bag(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx); // {"schema": "aten::_embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _embedding_bag_backward(const Tensor & grad, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx); // {"schema": "aten::_embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _embedding_bag_sparse_backward(const Tensor & grad, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, const Tensor & bag_size, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx); // {"schema": "aten::_embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _embedding_bag_dense_backward(const Tensor & grad, const Tensor & indices, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx); // {"schema": "aten::_embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _embedding_bag_per_sample_weights_backward(const Tensor & grad, const Tensor & weight, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, int64_t mode, int64_t padding_idx); // {"schema": "aten::_embedding_bag_per_sample_weights_backward(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor empty(IntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::empty.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor empty(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::empty.memory_format(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor empty_permuted(c10::SymIntArrayRef size, IntArrayRef physical_layout, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_empty(const Tensor & self, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_empty_strided(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_full(const Tensor & self, c10::SymIntArrayRef size, const Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_zeros(const Tensor & self, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_ones(const Tensor & self, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _empty_affine_quantized(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, double scale, int64_t zero_point, ::std::optional memory_format); // {"schema": "aten::_empty_affine_quantized(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _empty_per_channel_affine_quantized(c10::SymIntArrayRef size, const Tensor & scales, const Tensor & zero_points, int64_t axis, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::_empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor", "dispatch": "True", "default": "False"} +const Tensor & resize_(const Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format); // {"schema": "aten::resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +const Tensor & _resize_output_(const Tensor & self, c10::SymIntArrayRef size, Device device); // {"schema": "aten::_resize_output_(Tensor(a!) self, SymInt[] size, Device device) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor empty_quantized(IntArrayRef size, const Tensor & qtensor, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & empty_out(c10::SymIntArrayRef size, ::std::optional memory_format, Tensor & out); // {"schema": "aten::empty.out(SymInt[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor empty_like(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor erf(const Tensor & self); // {"schema": "aten::erf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & erf_(Tensor & self); // {"schema": "aten::erf_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & erf_out(const Tensor & self, Tensor & out); // {"schema": "aten::erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor erfc(const Tensor & self); // {"schema": "aten::erfc(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & erfc_(Tensor & self); // {"schema": "aten::erfc_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & erfc_out(const Tensor & self, Tensor & out); // {"schema": "aten::erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor exp(const Tensor & self); // {"schema": "aten::exp(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & exp_(Tensor & self); // {"schema": "aten::exp_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & exp_out(const Tensor & self, Tensor & out); // {"schema": "aten::exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor exp2(const Tensor & self); // {"schema": "aten::exp2(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & exp2_(Tensor & self); // {"schema": "aten::exp2_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & exp2_out(const Tensor & self, Tensor & out); // {"schema": "aten::exp2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor expm1(const Tensor & self); // {"schema": "aten::expm1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & expm1_(Tensor & self); // {"schema": "aten::expm1_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & expm1_out(const Tensor & self, Tensor & out); // {"schema": "aten::expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor expand(const Tensor & self, c10::SymIntArrayRef size, bool implicit); // {"schema": "aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor expand_as(const Tensor & self, const Tensor & other); // {"schema": "aten::expand_as(Tensor(a) self, Tensor other) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor eye(c10::SymInt n, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::eye(SymInt n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor eye(c10::SymInt n, c10::SymInt m, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::eye.m(SymInt n, SymInt m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & eye_out(c10::SymInt n, Tensor & out); // {"schema": "aten::eye.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & eye_out(c10::SymInt n, c10::SymInt m, Tensor & out); // {"schema": "aten::eye.m_out(SymInt n, SymInt m, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor flatten(const Tensor & self, int64_t start_dim, int64_t end_dim); // {"schema": "aten::flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor flatten(const Tensor & self, int64_t start_dim, int64_t end_dim, Dimname out_dim); // {"schema": "aten::flatten.named_out_dim(Tensor(a) self, int start_dim, int end_dim, Dimname out_dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor flatten(const Tensor & self, Dimname start_dim, Dimname end_dim, Dimname out_dim); // {"schema": "aten::flatten.using_names(Tensor(a) self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor flatten(const Tensor & self, DimnameList dims, Dimname out_dim); // {"schema": "aten::flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor unflatten(const Tensor & self, int64_t dim, c10::SymIntArrayRef sizes); // {"schema": "aten::unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor unflatten(const Tensor & self, Dimname dim, c10::SymIntArrayRef sizes, DimnameList names); // {"schema": "aten::unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor fill(const Tensor & self, const Scalar & value); // {"schema": "aten::fill.Scalar(Tensor self, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor fill(const Tensor & self, const Tensor & value); // {"schema": "aten::fill.Tensor(Tensor self, Tensor value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fill_(Tensor & self, const Scalar & value); // {"schema": "aten::fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & fill_(Tensor & self, const Tensor & value); // {"schema": "aten::fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor floor(const Tensor & self); // {"schema": "aten::floor(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & floor_(Tensor & self); // {"schema": "aten::floor_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & floor_out(const Tensor & self, Tensor & out); // {"schema": "aten::floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor floor_divide(const Tensor & self, const Tensor & other); // {"schema": "aten::floor_divide(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & floor_divide_(Tensor & self, const Tensor & other); // {"schema": "aten::floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & floor_divide_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor floor_divide(const Tensor & self, const Scalar & other); // {"schema": "aten::floor_divide.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & floor_divide_(Tensor & self, const Scalar & other); // {"schema": "aten::floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor frac(const Tensor & self); // {"schema": "aten::frac(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & frac_(Tensor & self); // {"schema": "aten::frac_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & frac_out(const Tensor & self, Tensor & out); // {"schema": "aten::frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor full(IntArrayRef size, const Scalar & fill_value, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor full(c10::SymIntArrayRef size, const Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::full(SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & full_out(c10::SymIntArrayRef size, const Scalar & fill_value, Tensor & out); // {"schema": "aten::full.out(SymInt[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor full_like(const Tensor & self, const Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::full_like(Tensor self, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor from_file(c10::string_view filename, ::std::optional shared, ::std::optional size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & gcd_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::gcd.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gcd(const Tensor & self, const Tensor & other); // {"schema": "aten::gcd(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gcd_(Tensor & self, const Tensor & other); // {"schema": "aten::gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lcm_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::lcm.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lcm(const Tensor & self, const Tensor & other); // {"schema": "aten::lcm(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lcm_(Tensor & self, const Tensor & other); // {"schema": "aten::lcm_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor grid_sampler(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "False", "default": "True"} +Tensor grid_sampler_2d(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple grid_sampler_2d_backward(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); // {"schema": "aten::grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _grid_sampler_2d_cpu_fallback(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::_grid_sampler_2d_cpu_fallback(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple _grid_sampler_2d_cpu_fallback_backward(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::_grid_sampler_2d_cpu_fallback_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor grid_sampler_3d(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::grid_sampler_3d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple grid_sampler_3d_backward(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); // {"schema": "aten::grid_sampler_3d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor hann_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hann_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::hamming_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::hamming_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, bool periodic, double alpha, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::hamming_window.periodic_alpha(int window_length, bool periodic, float alpha, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, bool periodic, double alpha, double beta, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::hamming_window.periodic_alpha_beta(int window_length, bool periodic, float alpha, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor kaiser_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor kaiser_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor kaiser_window(int64_t window_length, bool periodic, double beta, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hinge_embedding_loss(const Tensor & self, const Tensor & target, double margin, int64_t reduction); // {"schema": "aten::hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor group_norm(const Tensor & input, int64_t num_groups, const ::std::optional & weight, const ::std::optional & bias, double eps, bool cudnn_enabled); // {"schema": "aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple native_group_norm(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps); // {"schema": "aten::native_group_norm(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple native_group_norm_backward(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & rstd, const ::std::optional & weight, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, ::std::array output_mask); // {"schema": "aten::native_group_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, SymInt N, SymInt C, SymInt HxW, int group, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _fft_r2c(const Tensor & self, IntArrayRef dim, int64_t normalization, bool onesided); // {"schema": "aten::_fft_r2c(Tensor self, int[] dim, int normalization, bool onesided) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fft_r2c_out(const Tensor & self, IntArrayRef dim, int64_t normalization, bool onesided, Tensor & out); // {"schema": "aten::_fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _fft_c2r(const Tensor & self, IntArrayRef dim, int64_t normalization, c10::SymInt last_dim_size); // {"schema": "aten::_fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fft_c2r_out(const Tensor & self, IntArrayRef dim, int64_t normalization, c10::SymInt last_dim_size, Tensor & out); // {"schema": "aten::_fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _fft_c2c(const Tensor & self, c10::SymIntArrayRef dim, int64_t normalization, bool forward); // {"schema": "aten::_fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fft_c2c_out(const Tensor & self, c10::SymIntArrayRef dim, int64_t normalization, bool forward, Tensor & out); // {"schema": "aten::_fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +void _validate_compressed_sparse_indices(bool is_crow, const Tensor & compressed_idx, const Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); // {"schema": "aten::_validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()", "dispatch": "True", "default": "False"} +int64_t _cufft_get_plan_cache_size(DeviceIndex device_index); // {"schema": "aten::_cufft_get_plan_cache_size(DeviceIndex device_index) -> int", "dispatch": "False", "default": "True"} +int64_t _cufft_get_plan_cache_max_size(DeviceIndex device_index); // {"schema": "aten::_cufft_get_plan_cache_max_size(DeviceIndex device_index) -> int", "dispatch": "False", "default": "True"} +void _cufft_set_plan_cache_max_size(DeviceIndex device_index, int64_t max_size); // {"schema": "aten::_cufft_set_plan_cache_max_size(DeviceIndex device_index, int max_size) -> ()", "dispatch": "False", "default": "True"} +void _cufft_clear_plan_cache(DeviceIndex device_index); // {"schema": "aten::_cufft_clear_plan_cache(DeviceIndex device_index) -> ()", "dispatch": "False", "default": "True"} +Tensor index(const Tensor & self, const c10::List<::std::optional> & indices); // {"schema": "aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_out(const Tensor & self, const c10::List<::std::optional> & indices, Tensor & out); // {"schema": "aten::index.Tensor_out(Tensor self, Tensor?[] indices, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _unsafe_index(const Tensor & self, const c10::List<::std::optional> & indices); // {"schema": "aten::_unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _unsafe_masked_index(const Tensor & self, const Tensor & mask, const c10::List<::std::optional> & indices, const Scalar & fill); // {"schema": "aten::_unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _unsafe_masked_index_put_accumulate(const Tensor & self, const Tensor & mask, const c10::List<::std::optional> & indices, const Tensor & values); // {"schema": "aten::_unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_copy_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, Tensor & out); // {"schema": "aten::index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & index_copy_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_copy(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_copy_(Tensor & self, Dimname dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor index_copy(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & index_put_(Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate); // {"schema": "aten::index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_put(const Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate); // {"schema": "aten::index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _unsafe_index_put(const Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate); // {"schema": "aten::_unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _index_put_impl_(Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate, bool unsafe); // {"schema": "aten::_index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor instance_norm(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled); // {"schema": "aten::instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor", "dispatch": "False", "default": "True"} +Tensor isclose(const Tensor & self, const Tensor & other, double rtol, double atol, bool equal_nan); // {"schema": "aten::isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & isin_out(const Tensor & elements, const Tensor & test_elements, bool assume_unique, bool invert, Tensor & out); // {"schema": "aten::isin.Tensor_Tensor_out(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isin(const Tensor & elements, const Tensor & test_elements, bool assume_unique, bool invert); // {"schema": "aten::isin.Tensor_Tensor(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isin_out(const Tensor & elements, const Scalar & test_element, bool assume_unique, bool invert, Tensor & out); // {"schema": "aten::isin.Tensor_Scalar_out(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isin(const Tensor & elements, const Scalar & test_element, bool assume_unique, bool invert); // {"schema": "aten::isin.Tensor_Scalar(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isin_out(const Scalar & element, const Tensor & test_elements, bool assume_unique, bool invert, Tensor & out); // {"schema": "aten::isin.Scalar_Tensor_out(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isin(const Scalar & element, const Tensor & test_elements, bool assume_unique, bool invert); // {"schema": "aten::isin.Scalar_Tensor(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor isnan(const Tensor & self); // {"schema": "aten::isnan(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +bool is_distributed(const Tensor & self); // {"schema": "aten::is_distributed(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_floating_point(const Tensor & self); // {"schema": "aten::is_floating_point(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_complex(const Tensor & self); // {"schema": "aten::is_complex(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_conj(const Tensor & self); // {"schema": "aten::is_conj(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool _is_zerotensor(const Tensor & self); // {"schema": "aten::_is_zerotensor(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_neg(const Tensor & self); // {"schema": "aten::is_neg(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor isreal(const Tensor & self); // {"schema": "aten::isreal(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +bool is_nonzero(const Tensor & self); // {"schema": "aten::is_nonzero(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_same_size(const Tensor & self, const Tensor & other); // {"schema": "aten::is_same_size(Tensor self, Tensor other) -> bool", "dispatch": "True", "default": "True"} +bool is_signed(const Tensor & self); // {"schema": "aten::is_signed(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_inference(const Tensor & self); // {"schema": "aten::is_inference(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor kl_div(const Tensor & self, const Tensor & target, int64_t reduction, bool log_target); // {"schema": "aten::kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor kron(const Tensor & self, const Tensor & other); // {"schema": "aten::kron(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & kron_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::kron.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple kthvalue(const Tensor & self, int64_t k, int64_t dim, bool keepdim); // {"schema": "aten::kthvalue(Tensor self, int k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple kthvalue_out(const Tensor & self, int64_t k, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::kthvalue.values(Tensor self, int k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple kthvalue(const Tensor & self, int64_t k, Dimname dim, bool keepdim); // {"schema": "aten::kthvalue.dimname(Tensor self, int k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple kthvalue_out(const Tensor & self, int64_t k, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::kthvalue.dimname_out(Tensor self, int k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor layer_norm(const Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps, bool cudnn_enable); // {"schema": "aten::layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple native_layer_norm(const Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); // {"schema": "aten::native_layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight, Tensor? bias, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple native_layer_norm_backward(const Tensor & grad_out, const Tensor & input, c10::SymIntArrayRef normalized_shape, const Tensor & mean, const Tensor & rstd, const ::std::optional & weight, const ::std::optional & bias, ::std::array output_mask); // {"schema": "aten::native_layer_norm_backward(Tensor grad_out, Tensor input, SymInt[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor rms_norm(const Tensor & input, IntArrayRef normalized_shape, const ::std::optional & weight, ::std::optional eps); // {"schema": "aten::rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor nan_to_num(const Tensor & self, ::std::optional nan, ::std::optional posinf, ::std::optional neginf); // {"schema": "aten::nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & nan_to_num_(Tensor & self, ::std::optional nan, ::std::optional posinf, ::std::optional neginf); // {"schema": "aten::nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & nan_to_num_out(const Tensor & self, ::std::optional nan, ::std::optional posinf, ::std::optional neginf, Tensor & out); // {"schema": "aten::nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor linear(const Tensor & input, const Tensor & weight, const ::std::optional & bias); // {"schema": "aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple linear_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask); // {"schema": "aten::linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & linear_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, Tensor & out); // {"schema": "aten::linear.out(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor mkldnn_linear(const Tensor & self, const Tensor & weight, const ::std::optional & bias); // {"schema": "aten::mkldnn_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_linear_backward_input(IntArrayRef input_size, const Tensor & grad_output, const Tensor & weight); // {"schema": "aten::mkldnn_linear_backward_input(int[] input_size, Tensor grad_output, Tensor weight) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mkldnn_linear_backward_weights(const Tensor & grad_output, const Tensor & input, const Tensor & weight, bool bias_defined); // {"schema": "aten::mkldnn_linear_backward_weights(Tensor grad_output, Tensor input, Tensor weight, bool bias_defined) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple mkldnn_linear_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask); // {"schema": "aten::mkldnn_linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _cslt_compress(const Tensor & input); // {"schema": "aten::_cslt_compress(Tensor input) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _cslt_sparse_mm(const Tensor & compressed_A, const Tensor & dense_B, const ::std::optional & bias, const ::std::optional & alpha, ::std::optional out_dtype, bool transpose_result, int64_t alg_id); // {"schema": "aten::_cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor", "dispatch": "True", "default": "False"} +int64_t _cslt_sparse_mm_search(const Tensor & compressed_A, const Tensor & dense_B, const ::std::optional & bias, const ::std::optional & alpha, ::std::optional out_dtype, bool transpose_result); // {"schema": "aten::_cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int", "dispatch": "True", "default": "False"} +::std::tuple _sparse_semi_structured_tile(const Tensor & input, c10::string_view algorithm, bool use_cutlass); // {"schema": "aten::_sparse_semi_structured_tile(Tensor input, str algorithm=\"\", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _sparse_semi_structured_apply(const Tensor & input, const Tensor & thread_masks); // {"schema": "aten::_sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _sparse_semi_structured_apply_dense(const Tensor & input, const Tensor & thread_masks); // {"schema": "aten::_sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_semi_structured_linear(const Tensor & input, const Tensor & weight, const Tensor & meta, const ::std::optional & bias, ::std::optional activation, ::std::optional out_dtype); // {"schema": "aten::_sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_semi_structured_mm(const Tensor & mat1, const Tensor & mat1_meta, const Tensor & mat2, ::std::optional out_dtype); // {"schema": "aten::_sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_semi_structured_addmm(const Tensor & input, const Tensor & mat1, const Tensor & mat1_meta, const Tensor & mat2, const Scalar & alpha, const Scalar & beta, ::std::optional out_dtype); // {"schema": "aten::_sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _mixed_dtypes_linear(const Tensor & input, const Tensor & weight, const Tensor & scale, const ::std::optional & bias, ::std::optional activation); // {"schema": "aten::_mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor fbgemm_linear_int8_weight_fp32_activation(const Tensor & input, const Tensor & weight, const Tensor & packed, const Tensor & col_offsets, const Scalar & weight_scale, const Scalar & weight_zero_point, const Tensor & bias); // {"schema": "aten::fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_linear_int8_weight(const Tensor & input, const Tensor & weight, const Tensor & packed, const Tensor & col_offsets, const Scalar & weight_scale, const Scalar & weight_zero_point, const Tensor & bias); // {"schema": "aten::fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple fbgemm_linear_quantize_weight(const Tensor & input); // {"schema": "aten::fbgemm_linear_quantize_weight(Tensor input) -> (Tensor, Tensor, float, int)", "dispatch": "False", "default": "True"} +Tensor fbgemm_pack_gemm_matrix_fp16(const Tensor & input); // {"schema": "aten::fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _wrapped_linear_prepack(const Tensor & weight, const Tensor & weight_scale, const Tensor & weight_zero_point, const Tensor & bias); // {"schema": "aten::_wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _wrapped_quantized_linear_prepacked(const Tensor & input, const Tensor & input_scale, const Tensor & input_zero_point, const Tensor & packed_weight, const Tensor & output_scale, const Tensor & output_zero_point, int64_t out_channel); // {"schema": "aten::_wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_linear_fp16_weight_fp32_activation(const Tensor & input, const Tensor & packed_weight, const Tensor & bias); // {"schema": "aten::fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_linear_fp16_weight(const Tensor & input, const Tensor & packed_weight, const Tensor & bias); // {"schema": "aten::fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_pack_quantized_matrix(const Tensor & input); // {"schema": "aten::fbgemm_pack_quantized_matrix(Tensor input) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_pack_quantized_matrix(const Tensor & input, int64_t K, int64_t N); // {"schema": "aten::fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor", "dispatch": "False", "default": "True"} +Tensor ldexp(const Tensor & self, const Tensor & other); // {"schema": "aten::ldexp.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & ldexp_(Tensor & self, const Tensor & other); // {"schema": "aten::ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & ldexp_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linspace(const Scalar & start, const Scalar & end, int64_t steps, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor linspace(const Tensor & start, const Tensor & end, int64_t steps, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor linspace(const Tensor & start, const Scalar & end, int64_t steps, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor linspace(const Scalar & start, const Tensor & end, int64_t steps, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linspace_out(const Scalar & start, const Scalar & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & linspace_out(const Tensor & start, const Tensor & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & linspace_out(const Tensor & start, const Scalar & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & linspace_out(const Scalar & start, const Tensor & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log(const Tensor & self); // {"schema": "aten::log(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log_(Tensor & self); // {"schema": "aten::log_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log_out(const Tensor & self, Tensor & out); // {"schema": "aten::log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log10(const Tensor & self); // {"schema": "aten::log10(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log10_(Tensor & self); // {"schema": "aten::log10_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log10_out(const Tensor & self, Tensor & out); // {"schema": "aten::log10.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log1p(const Tensor & self); // {"schema": "aten::log1p(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log1p_(Tensor & self); // {"schema": "aten::log1p_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log1p_out(const Tensor & self, Tensor & out); // {"schema": "aten::log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log2(const Tensor & self); // {"schema": "aten::log2(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log2_(Tensor & self); // {"schema": "aten::log2_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log2_out(const Tensor & self, Tensor & out); // {"schema": "aten::log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & logaddexp_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logaddexp(const Tensor & self, const Tensor & other); // {"schema": "aten::logaddexp(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logaddexp2_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logaddexp2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logaddexp2(const Tensor & self, const Tensor & other); // {"schema": "aten::logaddexp2(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor xlogy(const Tensor & self, const Tensor & other); // {"schema": "aten::xlogy.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor xlogy(const Scalar & self, const Tensor & other); // {"schema": "aten::xlogy.Scalar_Self(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor xlogy(const Tensor & self, const Scalar & other); // {"schema": "aten::xlogy.Scalar_Other(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & xlogy_(Tensor & self, const Tensor & other); // {"schema": "aten::xlogy_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & xlogy_(Tensor & self, const Scalar & other); // {"schema": "aten::xlogy_.Scalar_Other(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & xlogy_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::xlogy.OutTensor(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & xlogy_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::xlogy.OutScalar_Self(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & xlogy_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::xlogy.OutScalar_Other(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor logspace(const Scalar & start, const Scalar & end, int64_t steps, double base, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logspace(const Tensor & start, const Tensor & end, int64_t steps, double base, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logspace(const Tensor & start, const Scalar & end, int64_t steps, double base, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logspace(const Scalar & start, const Tensor & end, int64_t steps, double base, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logspace_out(const Scalar & start, const Scalar & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & logspace_out(const Tensor & start, const Tensor & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logspace_out(const Tensor & start, const Scalar & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logspace_out(const Scalar & start, const Tensor & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log_softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & log_softmax_out(const Tensor & self, int64_t dim, ::std::optional dtype, Tensor & out); // {"schema": "aten::log_softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log_softmax(const Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _log_softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _log_softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _log_softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype); // {"schema": "aten::_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _log_softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype, Tensor & out); // {"schema": "aten::_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _logcumsumexp(const Tensor & self, int64_t dim); // {"schema": "aten::_logcumsumexp(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _logcumsumexp_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logcumsumexp(const Tensor & self, int64_t dim); // {"schema": "aten::logcumsumexp(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logcumsumexp_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor logcumsumexp(const Tensor & self, Dimname dim); // {"schema": "aten::logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & logcumsumexp_out(const Tensor & self, Dimname dim, Tensor & out); // {"schema": "aten::logcumsumexp.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor logsumexp(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logsumexp_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor logsumexp(const Tensor & self, DimnameList dim, bool keepdim); // {"schema": "aten::logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & logsumexp_out(const Tensor & self, DimnameList dim, bool keepdim, Tensor & out); // {"schema": "aten::logsumexp.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor margin_ranking_loss(const Tensor & input1, const Tensor & input2, const Tensor & target, double margin, int64_t reduction); // {"schema": "aten::margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor matmul(const Tensor & self, const Tensor & other); // {"schema": "aten::matmul(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple matmul_backward(const Tensor & grad, const Tensor & self, const Tensor & other, ::std::array mask); // {"schema": "aten::matmul_backward(Tensor grad, Tensor self, Tensor other, bool[2] mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & matmul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor matrix_power(const Tensor & self, int64_t n); // {"schema": "aten::matrix_power(Tensor self, int n) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & matrix_power_out(const Tensor & self, int64_t n, Tensor & out); // {"schema": "aten::matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor matrix_exp(const Tensor & self); // {"schema": "aten::matrix_exp(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor matrix_exp_backward(const Tensor & self, const Tensor & grad); // {"schema": "aten::matrix_exp_backward(Tensor self, Tensor grad) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _aminmax(const Tensor & self); // {"schema": "aten::_aminmax(Tensor self) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _aminmax(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::_aminmax.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple aminmax(const Tensor & self, ::std::optional dim, bool keepdim); // {"schema": "aten::aminmax(Tensor self, *, int? dim=None, bool keepdim=False) -> (Tensor min, Tensor max)", "dispatch": "True", "default": "True"} +::std::tuple aminmax_out(const Tensor & self, ::std::optional dim, bool keepdim, Tensor & min, Tensor & max); // {"schema": "aten::aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max)", "dispatch": "True", "default": "False"} +Tensor _compute_linear_combination(const Tensor & input, const Tensor & coefficients); // {"schema": "aten::_compute_linear_combination(Tensor input, Tensor coefficients) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _compute_linear_combination_out(const Tensor & input, const Tensor & coefficients, Tensor & out); // {"schema": "aten::_compute_linear_combination.out(Tensor input, Tensor coefficients, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple max(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple max_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & max, Tensor & max_values); // {"schema": "aten::max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple max(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple max_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & max, Tensor & max_values); // {"schema": "aten::max.names_dim_max(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor value_selecting_reduction_backward(const Tensor & grad, int64_t dim, const Tensor & indices, c10::SymIntArrayRef sizes, bool keepdim); // {"schema": "aten::value_selecting_reduction_backward(Tensor grad, int dim, Tensor indices, SymInt[] sizes, bool keepdim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor amax(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & amax_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple max_pool1d_with_indices(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor max_pool1d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor max_pool2d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool2d_backward(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool2d_backward(Tensor grad_output, Tensor output, Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool3d_backward(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool3d_backward(Tensor grad_output, Tensor output, Tensor input, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantized_max_pool1d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::quantized_max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantized_max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantized_max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::quantized_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor mean(const Tensor & self, ::std::optional dtype); // {"schema": "aten::mean(Tensor self, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mean_out(const Tensor & self, ::std::optional dtype, Tensor & out); // {"schema": "aten::mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor mean(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mean_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::mean.out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mean(const Tensor & self, DimnameList dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & mean_out(const Tensor & self, DimnameList dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::mean.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nanmean(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nanmean_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::nanmean.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor median(const Tensor & self); // {"schema": "aten::median(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple median(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple median_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::median.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple median(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::median.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple median_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::median.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor nanmedian(const Tensor & self); // {"schema": "aten::nanmedian(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple nanmedian(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple nanmedian_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple nanmedian(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple nanmedian_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +::std::tuple min(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple min_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & min, Tensor & min_indices); // {"schema": "aten::min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple min(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple min_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & min, Tensor & min_indices); // {"schema": "aten::min.names_dim_min(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor amin(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & amin_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _mps_convolution(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::_mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mps_convolution_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor mkldnn_convolution(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_rnn_layer(const Tensor & input, const Tensor & weight0, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & hx_, const Tensor & cx_, bool reverse, IntArrayRef batch_sizes, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train); // {"schema": "aten::mkldnn_rnn_layer(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple mkldnn_rnn_layer_backward(const Tensor & input, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & weight4, const Tensor & hx_, const Tensor & cx_tmp, const Tensor & output, const Tensor & hy_, const Tensor & cy_, const ::std::optional & grad_output, const ::std::optional & grad_hy, const ::std::optional & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, IntArrayRef batch_sizes, bool batch_first, const Tensor & workspace); // {"schema": "aten::mkldnn_rnn_layer_backward(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple miopen_batch_norm(const Tensor & input, const Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon); // {"schema": "aten::miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple miopen_batch_norm_backward(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon); // {"schema": "aten::miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor miopen_convolution(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); // {"schema": "aten::miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_convolution_transpose(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); // {"schema": "aten::miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_depthwise_convolution(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); // {"schema": "aten::miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_convolution_relu(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_convolution_add_relu(const Tensor & self, const Tensor & weight, const Tensor & z, const ::std::optional & alpha, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple miopen_rnn(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & hx, const ::std::optional & cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const ::std::optional & dropout_state); // {"schema": "aten::miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple> miopen_rnn_backward(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const ::std::optional & cx, const Tensor & output, const ::std::optional & grad_output, const ::std::optional & grad_hy, const ::std::optional & grad_cy, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const ::std::optional & dropout_state, const Tensor & reserve, ::std::array output_mask); // {"schema": "aten::miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])", "dispatch": "True", "default": "False"} +Tensor mm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::mm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mm_out(const Tensor & self, const Tensor & mat2, Tensor & out); // {"schema": "aten::mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _int_mm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::_int_mm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _int_mm_out(const Tensor & self, const Tensor & mat2, Tensor & out); // {"schema": "aten::_int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _convert_weight_to_int4pack(const Tensor & self, int64_t innerKTiles); // {"schema": "aten::_convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _weight_int4pack_mm(const Tensor & self, const Tensor & mat2, int64_t qGroupSize, const Tensor & qScaleAndZeros); // {"schema": "aten::_weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _weight_int8pack_mm(const Tensor & self, const Tensor & mat2, const Tensor & scales); // {"schema": "aten::_weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_mm(const Tensor & sparse, const Tensor & dense); // {"schema": "aten::_sparse_mm(Tensor sparse, Tensor dense) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_mm(const Tensor & sparse, const Tensor & dense, c10::string_view reduce); // {"schema": "aten::_sparse_mm.reduce(Tensor sparse, Tensor dense, str reduce) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sparse_matmul(const Tensor & self, const Tensor & other); // {"schema": "aten::_sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mode(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "False"} +::std::tuple mode_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::mode.values(Tensor self, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple mode(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple mode_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::mode.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor mul(const Tensor & self, const Tensor & other); // {"schema": "aten::mul.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mul_(Tensor & self, const Tensor & other); // {"schema": "aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mul(const Tensor & self, const Scalar & other); // {"schema": "aten::mul.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mul_(Tensor & self, const Scalar & other); // {"schema": "aten::mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor multiply(const Tensor & self, const Tensor & other); // {"schema": "aten::multiply.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & multiply_(Tensor & self, const Tensor & other); // {"schema": "aten::multiply_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & multiply_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::multiply.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor multiply(const Tensor & self, const Scalar & other); // {"schema": "aten::multiply.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & multiply_(Tensor & self, const Scalar & other); // {"schema": "aten::multiply_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor mv(const Tensor & self, const Tensor & vec); // {"schema": "aten::mv(Tensor self, Tensor vec) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mv_out(const Tensor & self, const Tensor & vec, Tensor & out); // {"schema": "aten::mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mvlgamma_out(const Tensor & self, int64_t p, Tensor & out); // {"schema": "aten::mvlgamma.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mvlgamma(const Tensor & self, int64_t p); // {"schema": "aten::mvlgamma(Tensor self, int p) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mvlgamma_(Tensor & self, int64_t p); // {"schema": "aten::mvlgamma_(Tensor(a!) self, int p) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor narrow_copy(const Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); // {"schema": "aten::narrow_copy(Tensor self, int dim, SymInt start, SymInt length) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & narrow_copy_out(const Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length, Tensor & out); // {"schema": "aten::narrow_copy.out(Tensor self, int dim, SymInt start, SymInt length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor narrow(const Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); // {"schema": "aten::narrow(Tensor(a) self, int dim, SymInt start, SymInt length) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor narrow(const Tensor & self, int64_t dim, const Tensor & start, c10::SymInt length); // {"schema": "aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)", "dispatch": "False", "default": "True"} +::std::tuple native_batch_norm(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps); // {"schema": "aten::native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple native_batch_norm_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd); // {"schema": "aten::native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, Tensor & running_mean, Tensor & running_var, bool training, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit_no_training(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & running_mean, const Tensor & running_var, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit_no_training(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple _native_batch_norm_legit_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, Tensor & running_mean, Tensor & running_var, bool training, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd); // {"schema": "aten::_native_batch_norm_legit.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd) -> (Tensor(d!), Tensor(e!), Tensor(f!))", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, bool training, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit.no_stats(Tensor input, Tensor? weight, Tensor? bias, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, bool training, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd); // {"schema": "aten::_native_batch_norm_legit.no_stats_out(Tensor input, Tensor? weight, Tensor? bias, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_stats(const Tensor & input, double eps); // {"schema": "aten::batch_norm_stats(Tensor input, float eps) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor batch_norm_elemt(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & mean, const Tensor & invstd, double eps); // {"schema": "aten::batch_norm_elemt(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & batch_norm_elemt_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & mean, const Tensor & invstd, double eps, Tensor & out); // {"schema": "aten::batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_gather_stats(const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, double eps, int64_t count); // {"schema": "aten::batch_norm_gather_stats(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_gather_stats_with_counts(const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, double eps, const Tensor & counts); // {"schema": "aten::batch_norm_gather_stats_with_counts(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple native_batch_norm_backward(const Tensor & grad_out, const Tensor & input, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_invstd, bool train, double eps, ::std::array output_mask); // {"schema": "aten::native_batch_norm_backward(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_backward_reduce(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & weight, bool input_g, bool weight_g, bool bias_g); // {"schema": "aten::batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor batch_norm_backward_elemt(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & weight, const Tensor & sum_dy, const Tensor & sum_dy_xmu, const Tensor & count); // {"schema": "aten::batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_update_stats(const Tensor & input, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum); // {"schema": "aten::batch_norm_update_stats(Tensor input, Tensor? running_mean, Tensor? running_var, float momentum) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +bool is_vulkan_available(); // {"schema": "aten::is_vulkan_available() -> bool", "dispatch": "False", "default": "True"} +bool _nnpack_available(); // {"schema": "aten::_nnpack_available() -> bool", "dispatch": "False", "default": "True"} +Tensor _nnpack_spatial_convolution(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride); // {"schema": "aten::_nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ones(IntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ones(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::ones(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ones_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::ones.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor ones_like(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::ones_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor pairwise_distance(const Tensor & x1, const Tensor & x2, double p, double eps, bool keepdim); // {"schema": "aten::pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cdist(const Tensor & x1, const Tensor & x2, double p, ::std::optional compute_mode); // {"schema": "aten::cdist(Tensor x1, Tensor x2, float p=2, int? compute_mode=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _euclidean_dist(const Tensor & x1, const Tensor & x2); // {"schema": "aten::_euclidean_dist(Tensor x1, Tensor x2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _cdist_forward(const Tensor & x1, const Tensor & x2, double p, ::std::optional compute_mode); // {"schema": "aten::_cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _cdist_backward(const Tensor & grad, const Tensor & x1, const Tensor & x2, double p, const Tensor & cdist); // {"schema": "aten::_cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor", "dispatch": "True", "default": "False"} +Tensor pdist(const Tensor & self, double p); // {"schema": "aten::pdist(Tensor self, float p=2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _pdist_forward(const Tensor & self, double p); // {"schema": "aten::_pdist_forward(Tensor self, float p=2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _pdist_backward(const Tensor & grad, const Tensor & self, double p, const Tensor & pdist); // {"schema": "aten::_pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cosine_similarity(const Tensor & x1, const Tensor & x2, int64_t dim, double eps); // {"schema": "aten::cosine_similarity(Tensor x1, Tensor x2, int dim=1, float eps=1e-08) -> Tensor", "dispatch": "False", "default": "True"} +Tensor permute(const Tensor & self, IntArrayRef dims); // {"schema": "aten::permute(Tensor(a) self, int[] dims) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor movedim(const Tensor & self, IntArrayRef source, IntArrayRef destination); // {"schema": "aten::movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor movedim(const Tensor & self, int64_t source, int64_t destination); // {"schema": "aten::movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor moveaxis(const Tensor & self, IntArrayRef source, IntArrayRef destination); // {"schema": "aten::moveaxis.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor moveaxis(const Tensor & self, int64_t source, int64_t destination); // {"schema": "aten::moveaxis.int(Tensor(a) self, int source, int destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor numpy_T(const Tensor & self); // {"schema": "aten::numpy_T(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor matrix_H(const Tensor & self); // {"schema": "aten::matrix_H(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor mT(const Tensor & self); // {"schema": "aten::mT(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor mH(const Tensor & self); // {"schema": "aten::mH(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor adjoint(const Tensor & self); // {"schema": "aten::adjoint(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor pixel_shuffle(const Tensor & self, int64_t upscale_factor); // {"schema": "aten::pixel_shuffle(Tensor self, int upscale_factor) -> Tensor", "dispatch": "True", "default": "True"} +Tensor pixel_unshuffle(const Tensor & self, int64_t downscale_factor); // {"schema": "aten::pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor", "dispatch": "True", "default": "True"} +Tensor channel_shuffle(const Tensor & self, c10::SymInt groups); // {"schema": "aten::channel_shuffle(Tensor self, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor native_channel_shuffle(const Tensor & self, c10::SymInt groups); // {"schema": "aten::native_channel_shuffle(Tensor self, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +bool is_pinned(const Tensor & self, ::std::optional device); // {"schema": "aten::is_pinned(Tensor self, Device? device=None) -> bool", "dispatch": "True", "default": "True"} +Tensor pin_memory(const Tensor & self, ::std::optional device); // {"schema": "aten::pin_memory(Tensor(a) self, Device? device=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _pin_memory(const Tensor & self, ::std::optional device); // {"schema": "aten::_pin_memory(Tensor self, Device? device=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor pinverse(const Tensor & self, double rcond); // {"schema": "aten::pinverse(Tensor self, float rcond=1e-15) -> Tensor", "dispatch": "False", "default": "True"} +Tensor poisson_nll_loss(const Tensor & input, const Tensor & target, bool log_input, bool full, double eps, int64_t reduction); // {"schema": "aten::poisson_nll_loss(Tensor input, Tensor target, bool log_input, bool full, float eps, int reduction) -> Tensor", "dispatch": "False", "default": "True"} +Tensor rad2deg(const Tensor & self); // {"schema": "aten::rad2deg(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rad2deg_(Tensor & self); // {"schema": "aten::rad2deg_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rad2deg_out(const Tensor & self, Tensor & out); // {"schema": "aten::rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor deg2rad(const Tensor & self); // {"schema": "aten::deg2rad(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & deg2rad_(Tensor & self); // {"schema": "aten::deg2rad_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & deg2rad_out(const Tensor & self, Tensor & out); // {"schema": "aten::deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor scalar_tensor(const Scalar & s, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::rand.names(SymInt[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::rand.generator_with_names(SymInt[] size, *, Generator? generator, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::rand(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::rand.generator(SymInt[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::rand.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, ::std::optional generator, Tensor & out); // {"schema": "aten::rand.generator_out(SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor rand_like(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::rand_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt high, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randint(SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt high, c10::SymIntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randint.generator(SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randint.low(SymInt low, SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randint.low_generator(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt high, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::randint.out(SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt high, c10::SymIntArrayRef size, ::std::optional generator, Tensor & out); // {"schema": "aten::randint.generator_out(SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::randint.low_out(SymInt low, SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, ::std::optional generator, Tensor & out); // {"schema": "aten::randint.low_generator_out(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor randint_like(const Tensor & self, c10::SymInt high, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::randint_like(Tensor self, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint_like(const Tensor & self, c10::SymInt low, c10::SymInt high, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randn(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randn.generator(SymInt[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randn.names(SymInt[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randn.generator_with_names(SymInt[] size, *, Generator? generator, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::randn.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, ::std::optional generator, Tensor & out); // {"schema": "aten::randn.generator_out(SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor randn_like(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::randn_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randperm(c10::SymInt n, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randperm(SymInt n, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randperm(c10::SymInt n, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::randperm.generator(SymInt n, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & randperm_out(c10::SymInt n, Tensor & out); // {"schema": "aten::randperm.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randperm_out(c10::SymInt n, ::std::optional generator, Tensor & out); // {"schema": "aten::randperm.generator_out(SymInt n, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor range(const Scalar & start, const Scalar & end, const Scalar & step, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::range.step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor range(const Scalar & start, const Scalar & end, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & range_out(const Scalar & start, const Scalar & end, Tensor & out); // {"schema": "aten::range.out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & range_out(const Scalar & start, const Scalar & end, const Scalar & step, Tensor & out); // {"schema": "aten::range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ravel(const Tensor & self); // {"schema": "aten::ravel(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor reciprocal(const Tensor & self); // {"schema": "aten::reciprocal(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reciprocal_(Tensor & self); // {"schema": "aten::reciprocal_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & reciprocal_out(const Tensor & self, Tensor & out); // {"schema": "aten::reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor neg(const Tensor & self); // {"schema": "aten::neg(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & neg_(Tensor & self); // {"schema": "aten::neg_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & neg_out(const Tensor & self, Tensor & out); // {"schema": "aten::neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor negative(const Tensor & self); // {"schema": "aten::negative(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & negative_(Tensor & self); // {"schema": "aten::negative_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & negative_out(const Tensor & self, Tensor & out); // {"schema": "aten::negative.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor repeat(const Tensor & self, c10::SymIntArrayRef repeats); // {"schema": "aten::repeat(Tensor self, SymInt[] repeats) -> Tensor", "dispatch": "True", "default": "True"} +Tensor repeat_interleave(const Tensor & repeats, ::std::optional output_size); // {"schema": "aten::repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor repeat_interleave(const Tensor & self, const Tensor & repeats, ::std::optional dim, ::std::optional output_size); // {"schema": "aten::repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor repeat_interleave(const Tensor & self, c10::SymInt repeats, ::std::optional dim, ::std::optional output_size); // {"schema": "aten::repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor reshape(const Tensor & self, c10::SymIntArrayRef shape); // {"schema": "aten::reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _reshape_copy(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::_reshape_copy(Tensor self, SymInt[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _reshape_alias(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::_reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _mkldnn_reshape(const Tensor & self, IntArrayRef shape); // {"schema": "aten::_mkldnn_reshape(Tensor self, int[] shape) -> Tensor", "dispatch": "True", "default": "False"} +Tensor reshape_as(const Tensor & self, const Tensor & other); // {"schema": "aten::reshape_as(Tensor(a) self, Tensor other) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor round(const Tensor & self); // {"schema": "aten::round(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & round_(Tensor & self); // {"schema": "aten::round_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & round_out(const Tensor & self, Tensor & out); // {"schema": "aten::round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor round(const Tensor & self, int64_t decimals); // {"schema": "aten::round.decimals(Tensor self, *, int decimals) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & round_(Tensor & self, int64_t decimals); // {"schema": "aten::round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & round_out(const Tensor & self, int64_t decimals, Tensor & out); // {"schema": "aten::round.decimals_out(Tensor self, *, int decimals, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor rrelu(const Tensor & self, const Scalar & lower, const Scalar & upper, bool training, ::std::optional generator); // {"schema": "aten::rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & rrelu_(Tensor & self, const Scalar & lower, const Scalar & upper, bool training, ::std::optional generator); // {"schema": "aten::rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor relu(const Tensor & self); // {"schema": "aten::relu(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & relu_(Tensor & self); // {"schema": "aten::relu_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor relu6(const Tensor & self); // {"schema": "aten::relu6(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & relu6_(Tensor & self); // {"schema": "aten::relu6_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor prelu(const Tensor & self, const Tensor & weight); // {"schema": "aten::prelu(Tensor self, Tensor weight) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _prelu_kernel(const Tensor & self, const Tensor & weight); // {"schema": "aten::_prelu_kernel(Tensor self, Tensor weight) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _prelu_kernel_backward(const Tensor & grad_output, const Tensor & self, const Tensor & weight); // {"schema": "aten::_prelu_kernel_backward(Tensor grad_output, Tensor self, Tensor weight) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & gelu_out(const Tensor & self, c10::string_view approximate, Tensor & out); // {"schema": "aten::gelu.out(Tensor self, *, str approximate='none', Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & gelu_(Tensor & self, c10::string_view approximate); // {"schema": "aten::gelu_(Tensor(a!) self, *, str approximate='none') -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor gelu(const Tensor & self, c10::string_view approximate); // {"schema": "aten::gelu(Tensor self, *, str approximate='none') -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gelu_backward_out(const Tensor & grad_output, const Tensor & self, c10::string_view approximate, Tensor & grad_input); // {"schema": "aten::gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gelu_backward(const Tensor & grad_output, const Tensor & self, c10::string_view approximate); // {"schema": "aten::gelu_backward(Tensor grad_output, Tensor self, *, str approximate='none') -> Tensor", "dispatch": "True", "default": "True"} +Tensor infinitely_differentiable_gelu_backward(const Tensor & grad, const Tensor & self); // {"schema": "aten::infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & hardshrink_out(const Tensor & self, const Scalar & lambd, Tensor & out); // {"schema": "aten::hardshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardshrink(const Tensor & self, const Scalar & lambd); // {"schema": "aten::hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hardshrink_backward_out(const Tensor & grad_out, const Tensor & self, const Scalar & lambd, Tensor & grad_input); // {"schema": "aten::hardshrink_backward.grad_input(Tensor grad_out, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardshrink_backward(const Tensor & grad_out, const Tensor & self, const Scalar & lambd); // {"schema": "aten::hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rsqrt(const Tensor & self); // {"schema": "aten::rsqrt(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rsqrt_(Tensor & self); // {"schema": "aten::rsqrt_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rsqrt_out(const Tensor & self, Tensor & out); // {"schema": "aten::rsqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor select(const Tensor & self, Dimname dim, int64_t index); // {"schema": "aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor select(const Tensor & self, int64_t dim, c10::SymInt index); // {"schema": "aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor select_backward(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt index); // {"schema": "aten::select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_select_backward(const Tensor & grad_output, const Tensor & self, int64_t dim, c10::SymInt index); // {"schema": "aten::_nested_select_backward(Tensor grad_output, Tensor self, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "False"} +Tensor selu(const Tensor & self); // {"schema": "aten::selu(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & selu_(Tensor & self); // {"schema": "aten::selu_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor celu(const Tensor & self, const Scalar & alpha); // {"schema": "aten::celu(Tensor self, Scalar alpha=1.0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & celu_(Tensor & self, const Scalar & alpha); // {"schema": "aten::celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor silu(const Tensor & self); // {"schema": "aten::silu(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & silu_(Tensor & self); // {"schema": "aten::silu_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & silu_out(const Tensor & self, Tensor & out); // {"schema": "aten::silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & silu_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & grad_input); // {"schema": "aten::silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor silu_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::silu_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor mish(const Tensor & self); // {"schema": "aten::mish(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mish_(Tensor & self); // {"schema": "aten::mish_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mish_out(const Tensor & self, Tensor & out); // {"schema": "aten::mish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mish_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::mish_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sigmoid(const Tensor & self); // {"schema": "aten::sigmoid(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sigmoid_(Tensor & self); // {"schema": "aten::sigmoid_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sigmoid_out(const Tensor & self, Tensor & out); // {"schema": "aten::sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logit(const Tensor & self, ::std::optional eps); // {"schema": "aten::logit(Tensor self, float? eps=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & logit_(Tensor & self, ::std::optional eps); // {"schema": "aten::logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & logit_out(const Tensor & self, ::std::optional eps, Tensor & out); // {"schema": "aten::logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sin(const Tensor & self); // {"schema": "aten::sin(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sin_(Tensor & self); // {"schema": "aten::sin_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sin_out(const Tensor & self, Tensor & out); // {"schema": "aten::sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sinc(const Tensor & self); // {"schema": "aten::sinc(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sinc_(Tensor & self); // {"schema": "aten::sinc_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sinc_out(const Tensor & self, Tensor & out); // {"schema": "aten::sinc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sinh(const Tensor & self); // {"schema": "aten::sinh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sinh_(Tensor & self); // {"schema": "aten::sinh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sinh_out(const Tensor & self, Tensor & out); // {"schema": "aten::sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor detach(const Tensor & self); // {"schema": "aten::detach(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & detach_(Tensor & self); // {"schema": "aten::detach_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +int64_t size(const Tensor & self, int64_t dim); // {"schema": "aten::size.int(Tensor self, int dim) -> int", "dispatch": "False", "default": "True"} +int64_t size(const Tensor & self, Dimname dim); // {"schema": "aten::size.Dimname(Tensor self, Dimname dim) -> int", "dispatch": "False", "default": "True"} +c10::SymInt sym_size(const Tensor & self, int64_t dim); // {"schema": "aten::sym_size.int(Tensor self, int dim) -> SymInt", "dispatch": "False", "default": "True"} +c10::SymInt sym_numel(const Tensor & self); // {"schema": "aten::sym_numel(Tensor self) -> SymInt", "dispatch": "False", "default": "True"} +c10::SymInt sym_storage_offset(const Tensor & self); // {"schema": "aten::sym_storage_offset(Tensor self) -> SymInt", "dispatch": "False", "default": "True"} +Tensor slice(const Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); // {"schema": "aten::slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor slice_backward(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt start, c10::SymInt end, c10::SymInt step); // {"schema": "aten::slice_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt start, SymInt end, SymInt step) -> Tensor", "dispatch": "True", "default": "True"} +Tensor slice_inverse(const Tensor & self, const Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); // {"schema": "aten::slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor slice_scatter(const Tensor & self, const Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); // {"schema": "aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor select_scatter(const Tensor & self, const Tensor & src, int64_t dim, c10::SymInt index); // {"schema": "aten::select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "True"} +Tensor diagonal_scatter(const Tensor & self, const Tensor & src, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor as_strided_scatter(const Tensor & self, const Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); // {"schema": "aten::as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor smm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::smm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & softmax_out(const Tensor & self, int64_t dim, ::std::optional dtype, Tensor & out); // {"schema": "aten::softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor softmax(const Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype); // {"schema": "aten::_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype, Tensor & grad_input); // {"schema": "aten::_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector unsafe_split(const Tensor & self, c10::SymInt split_size, int64_t dim); // {"schema": "aten::unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector split(const Tensor & self, c10::SymInt split_size, int64_t dim); // {"schema": "aten::split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector split(const Tensor & self, c10::SymIntArrayRef split_size, int64_t dim); // {"schema": "aten::split.sizes(Tensor(a -> *) self, SymInt[] split_size, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector unsafe_split_with_sizes(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim); // {"schema": "aten::unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector split_with_sizes(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim); // {"schema": "aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector hsplit(const Tensor & self, int64_t sections); // {"schema": "aten::hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector hsplit(const Tensor & self, IntArrayRef indices); // {"schema": "aten::hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector vsplit(const Tensor & self, int64_t sections); // {"schema": "aten::vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector vsplit(const Tensor & self, IntArrayRef indices); // {"schema": "aten::vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector dsplit(const Tensor & self, int64_t sections); // {"schema": "aten::dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector dsplit(const Tensor & self, IntArrayRef indices); // {"schema": "aten::dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +Tensor squeeze(const Tensor & self); // {"schema": "aten::squeeze(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor squeeze(const Tensor & self, int64_t dim); // {"schema": "aten::squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor squeeze(const Tensor & self, Dimname dim); // {"schema": "aten::squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor squeeze(const Tensor & self, IntArrayRef dim); // {"schema": "aten::squeeze.dims(Tensor(a) self, int[] dim) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self); // {"schema": "aten::squeeze_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self, int64_t dim); // {"schema": "aten::squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self, IntArrayRef dim); // {"schema": "aten::squeeze_.dims(Tensor(a!) self, int[] dim) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self, Dimname dim); // {"schema": "aten::squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor sspaddmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & sspaddmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::sspaddmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _chunk_cat(TensorList tensors, int64_t dim, int64_t num_chunks); // {"schema": "aten::_chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _chunk_cat_out(TensorList tensors, int64_t dim, int64_t num_chunks, Tensor & out); // {"schema": "aten::_chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor stack(TensorList tensors, int64_t dim); // {"schema": "aten::stack(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & stack_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _stack(TensorList tensors, int64_t dim); // {"schema": "aten::_stack(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _stack_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor hstack(TensorList tensors); // {"schema": "aten::hstack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & hstack_out(TensorList tensors, Tensor & out); // {"schema": "aten::hstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor vstack(TensorList tensors); // {"schema": "aten::vstack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & vstack_out(TensorList tensors, Tensor & out); // {"schema": "aten::vstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor dstack(TensorList tensors); // {"schema": "aten::dstack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & dstack_out(TensorList tensors, Tensor & out); // {"schema": "aten::dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor stft(const Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided, ::std::optional return_complex); // {"schema": "aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor stft(const Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool center, c10::string_view pad_mode, bool normalized, ::std::optional onesided, ::std::optional return_complex); // {"schema": "aten::stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode=\"reflect\", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor istft(const Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool center, bool normalized, ::std::optional onesided, ::std::optional length, bool return_complex); // {"schema": "aten::istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor", "dispatch": "False", "default": "True"} +int64_t stride(const Tensor & self, int64_t dim); // {"schema": "aten::stride.int(Tensor self, int dim) -> int", "dispatch": "False", "default": "True"} +int64_t stride(const Tensor & self, Dimname dim); // {"schema": "aten::stride.Dimname(Tensor self, Dimname dim) -> int", "dispatch": "False", "default": "True"} +c10::SymInt sym_stride(const Tensor & self, int64_t dim); // {"schema": "aten::sym_stride.int(Tensor self, int dim) -> SymInt", "dispatch": "False", "default": "True"} +Tensor sum(const Tensor & self, ::std::optional dtype); // {"schema": "aten::sum(Tensor self, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sum(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sum(const Tensor & self, DimnameList dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & sum_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::sum.IntList_out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & sum_out(const Tensor & self, DimnameList dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor _nested_sum_backward(const Tensor & grad, const Tensor & self, OptionalIntArrayRef dim, bool keepdim); // {"schema": "aten::_nested_sum_backward(Tensor grad, Tensor self, int[1]? dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor nansum(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::nansum(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & nansum_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::nansum.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sum_to_size(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::sum_to_size(Tensor self, SymInt[] size) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sqrt(const Tensor & self); // {"schema": "aten::sqrt(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sqrt_(Tensor & self); // {"schema": "aten::sqrt_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sqrt_out(const Tensor & self, Tensor & out); // {"schema": "aten::sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor square(const Tensor & self); // {"schema": "aten::square(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & square_(Tensor & self); // {"schema": "aten::square_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & square_out(const Tensor & self, Tensor & out); // {"schema": "aten::square.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, bool unbiased); // {"schema": "aten::std(Tensor self, bool unbiased=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::std.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple std_mean(const Tensor & self, bool unbiased); // {"schema": "aten::std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple std_mean(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::std_mean.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple std_mean(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::std_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple std_mean(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple std_mean(const Tensor & self, DimnameList dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::std_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::std.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::std.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor std(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::std.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, DimnameList dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, DimnameList dim, const ::std::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::std.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor prod(const Tensor & self, ::std::optional dtype); // {"schema": "aten::prod(Tensor self, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor prod(const Tensor & self, int64_t dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & prod_out(const Tensor & self, int64_t dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor prod(const Tensor & self, Dimname dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & prod_out(const Tensor & self, Dimname dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::prod.Dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor t(const Tensor & self); // {"schema": "aten::t(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & t_(Tensor & self); // {"schema": "aten::t_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor tan(const Tensor & self); // {"schema": "aten::tan(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tan_(Tensor & self); // {"schema": "aten::tan_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & tan_out(const Tensor & self, Tensor & out); // {"schema": "aten::tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tanh(const Tensor & self); // {"schema": "aten::tanh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tanh_(Tensor & self); // {"schema": "aten::tanh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & tanh_out(const Tensor & self, Tensor & out); // {"schema": "aten::tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tensordot(const Tensor & self, const Tensor & other, IntArrayRef dims_self, IntArrayRef dims_other); // {"schema": "aten::tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & tensordot_out(const Tensor & self, const Tensor & other, IntArrayRef dims_self, IntArrayRef dims_other, Tensor & out); // {"schema": "aten::tensordot.out(Tensor self, Tensor other, int[] dims_self, int[] dims_other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor threshold(const Tensor & self, const Scalar & threshold, const Scalar & value); // {"schema": "aten::threshold(Tensor self, Scalar threshold, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & threshold_(Tensor & self, const Scalar & threshold, const Scalar & value); // {"schema": "aten::threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & threshold_out(const Tensor & self, const Scalar & threshold, const Scalar & value, Tensor & out); // {"schema": "aten::threshold.out(Tensor self, Scalar threshold, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & threshold_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & threshold, Tensor & grad_input); // {"schema": "aten::threshold_backward.grad_input(Tensor grad_output, Tensor self, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor threshold_backward(const Tensor & grad_output, const Tensor & self, const Scalar & threshold); // {"schema": "aten::threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor", "dispatch": "True", "default": "True"} +Tensor tile(const Tensor & self, c10::SymIntArrayRef dims); // {"schema": "aten::tile(Tensor self, SymInt[] dims) -> Tensor", "dispatch": "False", "default": "True"} +Tensor transpose(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor transpose(const Tensor & self, Dimname dim0, Dimname dim1); // {"schema": "aten::transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _mkldnn_transpose(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::_mkldnn_transpose(Tensor self, int dim0, int dim1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & transpose_(Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mkldnn_transpose_(Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::_mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor one_hot(const Tensor & self, int64_t num_classes); // {"schema": "aten::one_hot(Tensor self, int num_classes=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor flip(const Tensor & self, IntArrayRef dims); // {"schema": "aten::flip(Tensor self, int[] dims) -> Tensor", "dispatch": "True", "default": "False"} +Tensor fliplr(const Tensor & self); // {"schema": "aten::fliplr(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor flipud(const Tensor & self); // {"schema": "aten::flipud(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor roll(const Tensor & self, c10::SymIntArrayRef shifts, IntArrayRef dims); // {"schema": "aten::roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor", "dispatch": "True", "default": "False"} +Tensor rot90(const Tensor & self, int64_t k, IntArrayRef dims); // {"schema": "aten::rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor", "dispatch": "True", "default": "True"} +Tensor trapezoid(const Tensor & y, const Tensor & x, int64_t dim); // {"schema": "aten::trapezoid.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trapezoid(const Tensor & y, const Scalar & dx, int64_t dim); // {"schema": "aten::trapezoid.dx(Tensor y, *, Scalar dx=1, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trapz(const Tensor & y, const Tensor & x, int64_t dim); // {"schema": "aten::trapz.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trapz(const Tensor & y, double dx, int64_t dim); // {"schema": "aten::trapz.dx(Tensor y, *, float dx=1, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _transform_bias_rescale_qkv(const Tensor & qkv, const Tensor & qkv_bias, int64_t num_heads); // {"schema": "aten::_transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_from_mask(const Tensor & t, const Tensor & mask, bool mask_check); // {"schema": "aten::_nested_tensor_from_mask(Tensor t, Tensor mask, bool mask_check=True) -> Tensor", "dispatch": "True", "default": "False"} +bool _nested_tensor_from_mask_left_aligned(const Tensor & t, const Tensor & mask); // {"schema": "aten::_nested_tensor_from_mask_left_aligned(Tensor t, Tensor mask) -> bool", "dispatch": "True", "default": "False"} +Tensor _nested_from_padded(const Tensor & padded, const Tensor & cpu_nested_shape_example, bool fuse_transform_0213); // {"schema": "aten::_nested_from_padded(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_size(const Tensor & self); // {"schema": "aten::_nested_tensor_size(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_strides(const Tensor & self); // {"schema": "aten::_nested_tensor_strides(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_storage_offsets(const Tensor & self); // {"schema": "aten::_nested_tensor_storage_offsets(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_from_padded_and_nested_example(const Tensor & padded, const Tensor & nt_example); // {"schema": "aten::_nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_view_from_buffer(const Tensor & self, const Tensor & nested_size, const Tensor & nested_strides, const Tensor & offsets); // {"schema": "aten::_nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _nested_view_from_buffer_copy(const Tensor & self, const Tensor & nested_size, const Tensor & nested_strides, const Tensor & offsets); // {"schema": "aten::_nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_view_from_jagged(const Tensor & self, const Tensor & offsets, const Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen); // {"schema": "aten::_nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _nested_view_from_jagged_copy(const Tensor & self, const Tensor & offsets, const Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen); // {"schema": "aten::_nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_get_values(const Tensor & self); // {"schema": "aten::_nested_get_values(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _nested_get_values_copy(const Tensor & self); // {"schema": "aten::_nested_get_values_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_get_offsets(const Tensor & self); // {"schema": "aten::_nested_get_offsets(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_get_lengths(const Tensor & self); // {"schema": "aten::_nested_get_lengths(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +int64_t _nested_get_ragged_idx(const Tensor & self); // {"schema": "aten::_nested_get_ragged_idx(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor _nested_get_min_seqlen(const Tensor & self); // {"schema": "aten::_nested_get_min_seqlen(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_get_max_seqlen(const Tensor & self); // {"schema": "aten::_nested_get_max_seqlen(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_get_jagged_dummy(const Tensor & any); // {"schema": "aten::_nested_get_jagged_dummy(Tensor any) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _nested_compute_contiguous_strides_offsets(const Tensor & nested_size); // {"schema": "aten::_nested_compute_contiguous_strides_offsets(Tensor nested_size) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _trilinear(const Tensor & i1, const Tensor & i2, const Tensor & i3, IntArrayRef expand1, IntArrayRef expand2, IntArrayRef expand3, IntArrayRef sumdim, int64_t unroll_dim); // {"schema": "aten::_trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor triplet_margin_loss(const Tensor & anchor, const Tensor & positive, const Tensor & negative, double margin, double p, double eps, bool swap, int64_t reduction); // {"schema": "aten::triplet_margin_loss(Tensor anchor, Tensor positive, Tensor negative, float margin=1.0, float p=2, float eps=1e-06, bool swap=False, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trunc(const Tensor & self); // {"schema": "aten::trunc(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & trunc_(Tensor & self); // {"schema": "aten::trunc_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & trunc_out(const Tensor & self, Tensor & out); // {"schema": "aten::trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fix(const Tensor & self); // {"schema": "aten::fix(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fix_(Tensor & self); // {"schema": "aten::fix_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & fix_out(const Tensor & self, Tensor & out); // {"schema": "aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor type_as(const Tensor & self, const Tensor & other); // {"schema": "aten::type_as(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +bool _has_compatible_shallow_copy_type(const Tensor & self, const Tensor & from); // {"schema": "aten::_has_compatible_shallow_copy_type(Tensor self, Tensor from) -> bool", "dispatch": "False", "default": "True"} +::std::tuple _unique(const Tensor & self, bool sorted, bool return_inverse); // {"schema": "aten::_unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple unique_dim(const Tensor & self, int64_t dim, bool sorted, bool return_inverse, bool return_counts); // {"schema": "aten::unique_dim(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple unique_consecutive(const Tensor & self, bool return_inverse, bool return_counts, ::std::optional dim); // {"schema": "aten::unique_consecutive(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple unique_dim_consecutive(const Tensor & self, int64_t dim, bool return_inverse, bool return_counts); // {"schema": "aten::unique_dim_consecutive(Tensor self, int dim, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _unique2(const Tensor & self, bool sorted, bool return_inverse, bool return_counts); // {"schema": "aten::_unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _unsafe_view(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::_unsafe_view(Tensor self, SymInt[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor unsqueeze(const Tensor & self, int64_t dim); // {"schema": "aten::unsqueeze(Tensor(a) self, int dim) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & unsqueeze_(Tensor & self, int64_t dim); // {"schema": "aten::unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor vander(const Tensor & x, ::std::optional N, bool increasing); // {"schema": "aten::vander(Tensor x, int? N=None, bool increasing=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, bool unbiased); // {"schema": "aten::var(Tensor self, bool unbiased=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::var.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & var_out(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & var_out(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::var.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor var(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & var_out(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::var.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, DimnameList dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & var_out(const Tensor & self, DimnameList dim, const ::std::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::var.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, bool unbiased); // {"schema": "aten::var_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::var_mean.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::var_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple var_mean(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, DimnameList dim, const ::std::optional & correction, bool keepdim); // {"schema": "aten::var_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor view_as(const Tensor & self, const Tensor & other); // {"schema": "aten::view_as(Tensor(a) self, Tensor other) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor where(const Tensor & condition, const Tensor & self, const Tensor & other); // {"schema": "aten::where.self(Tensor condition, Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & where_out(const Tensor & condition, const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor where(const Tensor & condition, const Scalar & self, const Tensor & other); // {"schema": "aten::where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor where(const Tensor & condition, const Tensor & self, const Scalar & other); // {"schema": "aten::where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor where(const Tensor & condition, const Scalar & self, const Scalar & other); // {"schema": "aten::where.Scalar(Tensor condition, Scalar self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector where(const Tensor & condition); // {"schema": "aten::where(Tensor condition) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor norm_except_dim(const Tensor & v, int64_t pow, int64_t dim); // {"schema": "aten::norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _weight_norm(const Tensor & v, const Tensor & g, int64_t dim); // {"schema": "aten::_weight_norm(Tensor v, Tensor g, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _weight_norm_interface(const Tensor & v, const Tensor & g, int64_t dim); // {"schema": "aten::_weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _weight_norm_interface_backward(const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim); // {"schema": "aten::_weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _weight_norm_differentiable_backward(const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim); // {"schema": "aten::_weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor zeros(IntArrayRef size, ::std::optional names, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _efficientzerotensor(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_efficientzerotensor(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor zeros(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & zeros_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::zeros.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor zeros_like(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); // {"schema": "aten::zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _standard_gamma_grad(const Tensor & self, const Tensor & output); // {"schema": "aten::_standard_gamma_grad(Tensor self, Tensor output) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _standard_gamma(const Tensor & self, ::std::optional generator); // {"schema": "aten::_standard_gamma(Tensor self, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _dirichlet_grad(const Tensor & x, const Tensor & alpha, const Tensor & total); // {"schema": "aten::_dirichlet_grad(Tensor x, Tensor alpha, Tensor total) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sample_dirichlet(const Tensor & self, ::std::optional generator); // {"schema": "aten::_sample_dirichlet(Tensor self, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor poisson(const Tensor & self, ::std::optional generator); // {"schema": "aten::poisson(Tensor self, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor binomial(const Tensor & count, const Tensor & prob, ::std::optional generator); // {"schema": "aten::binomial(Tensor count, Tensor prob, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor native_norm(const Tensor & self, const Scalar & p); // {"schema": "aten::native_norm(Tensor self, Scalar p=2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor native_norm(const Tensor & self, const ::std::optional & p, IntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::native_norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, ScalarType? dtype) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _batch_norm_with_update(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, Tensor & running_mean, Tensor & running_var, double momentum, double eps); // {"schema": "aten::_batch_norm_with_update(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _batch_norm_with_update_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, Tensor & running_mean, Tensor & running_var, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd, Tensor & reserve); // {"schema": "aten::_batch_norm_with_update.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd, Tensor(g!) reserve) -> (Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))", "dispatch": "True", "default": "False"} +::std::tuple _batch_norm_no_update(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, double eps); // {"schema": "aten::_batch_norm_no_update(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_backward(const Tensor & grad_out, const Tensor & input, const Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, bool update, double eps, ::std::array output_mask, const Tensor & reserve); // {"schema": "aten::batch_norm_backward(Tensor grad_out, Tensor input, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, bool update, float eps, bool[3] output_mask, Tensor reserve) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _sparse_sum(const Tensor & self); // {"schema": "aten::_sparse_sum(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sum(const Tensor & self, ScalarType dtype); // {"schema": "aten::_sparse_sum.dtype(Tensor self, *, ScalarType dtype) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sum(const Tensor & self, IntArrayRef dim); // {"schema": "aten::_sparse_sum.dim(Tensor self, int[1] dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _sparse_sum(const Tensor & self, IntArrayRef dim, ScalarType dtype); // {"schema": "aten::_sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sum_backward(const Tensor & grad, const Tensor & self, IntArrayRef dim); // {"schema": "aten::_sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_csr_sum(const Tensor & self, IntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::_sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_csr_prod(const Tensor & self, IntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::_sparse_csr_prod.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::_sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_softmax(const Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::_sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self); // {"schema": "aten::_sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_log_softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::_sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_log_softmax(const Tensor & self, Dimname dim, ::std::optional dtype); // {"schema": "aten::_sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_log_softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_log_softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self); // {"schema": "aten::_sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _spdiags(const Tensor & diagonals, const Tensor & offsets, IntArrayRef shape, ::std::optional layout); // {"schema": "aten::_spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor norm(const Tensor & self, const ::std::optional & p, ScalarType dtype); // {"schema": "aten::norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor norm(const Tensor & self, const Scalar & p); // {"schema": "aten::norm.Scalar(Tensor self, Scalar p=2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor norm(const Tensor & self, const ::std::optional & p, IntArrayRef dim, bool keepdim, ScalarType dtype); // {"schema": "aten::norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor norm(const Tensor & self, const ::std::optional & p, IntArrayRef dim, bool keepdim); // {"schema": "aten::norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & norm_out(const Tensor & self, const ::std::optional & p, IntArrayRef dim, bool keepdim, ScalarType dtype, Tensor & out); // {"schema": "aten::norm.dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & norm_out(const Tensor & self, const ::std::optional & p, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::norm.out(Tensor self, Scalar? p, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor norm(const Tensor & self, const ::std::optional & p, DimnameList dim, bool keepdim, ScalarType dtype); // {"schema": "aten::norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor", "dispatch": "False", "default": "True"} +Tensor norm(const Tensor & self, const ::std::optional & p, DimnameList dim, bool keepdim); // {"schema": "aten::norm.names_ScalarOpt_dim(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & norm_out(const Tensor & self, const ::std::optional & p, DimnameList dim, bool keepdim, ScalarType dtype, Tensor & out); // {"schema": "aten::norm.names_dtype_out(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & norm_out(const Tensor & self, const ::std::optional & p, DimnameList dim, bool keepdim, Tensor & out); // {"schema": "aten::norm.names_out(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple frexp(const Tensor & self); // {"schema": "aten::frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent)", "dispatch": "True", "default": "True"} +::std::tuple frexp_out(const Tensor & self, Tensor & mantissa, Tensor & exponent); // {"schema": "aten::frexp.Tensor_out(Tensor self, *, Tensor(a!) mantissa, Tensor(b!) exponent) -> (Tensor(a!) mantissa, Tensor(b!) exponent)", "dispatch": "True", "default": "False"} +Tensor frobenius_norm(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & frobenius_norm_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::frobenius_norm.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nuclear_norm(const Tensor & self, bool keepdim); // {"schema": "aten::nuclear_norm(Tensor self, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nuclear_norm_out(const Tensor & self, bool keepdim, Tensor & out); // {"schema": "aten::nuclear_norm.out(Tensor self, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nuclear_norm(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::nuclear_norm.dim(Tensor self, int[2] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nuclear_norm_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::nuclear_norm.dim_out(Tensor self, int[2] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor clone(const Tensor & self, ::std::optional memory_format); // {"schema": "aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor positive(const Tensor & self); // {"schema": "aten::positive(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +const Tensor & resize_as_(const Tensor & self, const Tensor & the_template, ::std::optional memory_format); // {"schema": "aten::resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & resize_as_sparse_(const Tensor & self, const Tensor & the_template); // {"schema": "aten::resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & zero_(Tensor & self); // {"schema": "aten::zero_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & sub_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sub(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sub_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor sub(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sub_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & subtract_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::subtract.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor subtract(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::subtract.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & subtract_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::subtract_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor subtract(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::subtract.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & subtract_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::subtract_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor rsub(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & heaviside_out(const Tensor & self, const Tensor & values, Tensor & out); // {"schema": "aten::heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor heaviside(const Tensor & self, const Tensor & values); // {"schema": "aten::heaviside(Tensor self, Tensor values) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & heaviside_(Tensor & self, const Tensor & values); // {"schema": "aten::heaviside_(Tensor(a!) self, Tensor values) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor rsub(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _sparse_addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::_sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sparse_sampled_addmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::sparse_sampled_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sparse_sampled_addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _sparse_mm_reduce_impl(const Tensor & self, const Tensor & other, c10::string_view reduce); // {"schema": "aten::_sparse_mm_reduce_impl(Tensor self, Tensor other, str reduce) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _sparse_mm_reduce_impl_backward(const Tensor & self, const Tensor & grad_out, const Tensor & weight, c10::string_view reduce, const Tensor & arg_out, ::std::array output_mask); // {"schema": "aten::_sparse_mm_reduce_impl_backward(Tensor self, Tensor grad_out, Tensor weight, str reduce, Tensor arg_out, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & addmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addmm_(Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _addmm_activation_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, bool use_gelu, Tensor & out); // {"schema": "aten::_addmm_activation.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _addmm_activation(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, bool use_gelu); // {"schema": "aten::_addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _scaled_mm(const Tensor & self, const Tensor & mat2, const Tensor & scale_a, const Tensor & scale_b, const ::std::optional & bias, const ::std::optional & scale_result, ::std::optional out_dtype, bool use_fast_accum); // {"schema": "aten::_scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _scaled_mm_out(const Tensor & self, const Tensor & mat2, const Tensor & scale_a, const Tensor & scale_b, const ::std::optional & bias, const ::std::optional & scale_result, ::std::optional out_dtype, bool use_fast_accum, Tensor & out); // {"schema": "aten::_scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _sparse_compressed_tensor_with_dims(int64_t nnz, int64_t dense_dim, IntArrayRef size, IntArrayRef blocksize, ScalarType index_dtype, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_compressed_tensor(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_csr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_csr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_csc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_csc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_bsr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_bsc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_compressed_tensor(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_compressed_tensor.comp_plain_value(Tensor compressed_indices, Tensor plain_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_csr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_csr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_csc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_csc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_compressed_tensor_unsafe(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_csr_tensor_unsafe(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_csc_tensor_unsafe(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_bsr_tensor_unsafe(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_bsc_tensor_unsafe(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_coo_tensor(IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::sparse_coo_tensor.size(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); // {"schema": "aten::sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); // {"schema": "aten::sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_coo_tensor_unsafe(const Tensor & indices, const Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); // {"schema": "aten::_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor", "dispatch": "False", "default": "True"} +void _validate_sparse_coo_tensor_args(const Tensor & indices, const Tensor & values, IntArrayRef size, ::std::optional is_coalesced); // {"schema": "aten::_validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_compressed_tensor_args(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, IntArrayRef size, Layout layout); // {"schema": "aten::_validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_csr_tensor_args(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_csc_tensor_args(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_bsr_tensor_args(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_bsc_tensor_args(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +Tensor _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const Tensor & indices, const Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); // {"schema": "aten::_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor", "dispatch": "True", "default": "False"} +const Tensor & sparse_resize_(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)", "dispatch": "True", "default": "False"} +const Tensor & sparse_resize_and_clear_(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sparse_mask(const Tensor & self, const Tensor & mask); // {"schema": "aten::sparse_mask(Tensor self, Tensor mask) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_mask_projection(const Tensor & self, const Tensor & mask, bool accumulate_matches); // {"schema": "aten::_sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector _to_cpu(TensorList tensors); // {"schema": "aten::_to_cpu(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor to_dense(const Tensor & self, ::std::optional dtype, ::std::optional masked_grad); // {"schema": "aten::to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_dense(const Tensor & self, ::std::optional dtype, ::std::optional masked_grad); // {"schema": "aten::_to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_dense_backward(const Tensor & grad, const Tensor & input, ::std::optional masked_grad); // {"schema": "aten::to_dense_backward(Tensor grad, Tensor input, bool? masked_grad=None) -> Tensor", "dispatch": "False", "default": "True"} +int64_t sparse_dim(const Tensor & self); // {"schema": "aten::sparse_dim(Tensor self) -> int", "dispatch": "True", "default": "True"} +int64_t _dimI(const Tensor & self); // {"schema": "aten::_dimI(Tensor self) -> int", "dispatch": "True", "default": "False"} +int64_t dense_dim(const Tensor & self); // {"schema": "aten::dense_dim(Tensor self) -> int", "dispatch": "True", "default": "True"} +int64_t _dimV(const Tensor & self); // {"schema": "aten::_dimV(Tensor self) -> int", "dispatch": "True", "default": "False"} +int64_t _nnz(const Tensor & self); // {"schema": "aten::_nnz(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor coalesce(const Tensor & self); // {"schema": "aten::coalesce(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _coalesce(const Tensor & self); // {"schema": "aten::_coalesce(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +bool is_coalesced(const Tensor & self); // {"schema": "aten::is_coalesced(Tensor self) -> bool", "dispatch": "True", "default": "True"} +Tensor _indices(const Tensor & self); // {"schema": "aten::_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _values(const Tensor & self); // {"schema": "aten::_values(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor & _coalesced_(Tensor & self, bool coalesced); // {"schema": "aten::_coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor indices(const Tensor & self); // {"schema": "aten::indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor values(const Tensor & self); // {"schema": "aten::values(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor crow_indices(const Tensor & self); // {"schema": "aten::crow_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor col_indices(const Tensor & self); // {"schema": "aten::col_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor ccol_indices(const Tensor & self); // {"schema": "aten::ccol_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor row_indices(const Tensor & self); // {"schema": "aten::row_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & hspmm_out(const Tensor & mat1, const Tensor & mat2, Tensor & out); // {"schema": "aten::hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hspmm(const Tensor & mat1, const Tensor & mat2); // {"schema": "aten::hspmm(Tensor mat1, Tensor mat2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & copy_sparse_to_sparse_(Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy_sparse_to_sparse_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector unbind(const Tensor & self, int64_t dim); // {"schema": "aten::unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector unbind(const Tensor & self, Dimname dim); // {"schema": "aten::unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +Tensor to_sparse(const Tensor & self, int64_t sparse_dim); // {"schema": "aten::to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse(const Tensor & self, int64_t sparse_dim); // {"schema": "aten::_to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse(const Tensor & self, ::std::optional layout, OptionalIntArrayRef blocksize, ::std::optional dense_dim); // {"schema": "aten::to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse(const Tensor & self, ::std::optional layout, OptionalIntArrayRef blocksize, ::std::optional dense_dim); // {"schema": "aten::_to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_csr(const Tensor & self, ::std::optional dense_dim); // {"schema": "aten::to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_csr(const Tensor & self, ::std::optional dense_dim); // {"schema": "aten::_to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_csc(const Tensor & self, ::std::optional dense_dim); // {"schema": "aten::to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_csc(const Tensor & self, ::std::optional dense_dim); // {"schema": "aten::_to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_bsr(const Tensor & self, IntArrayRef blocksize, ::std::optional dense_dim); // {"schema": "aten::to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_bsr(const Tensor & self, IntArrayRef blocksize, ::std::optional dense_dim); // {"schema": "aten::_to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_bsc(const Tensor & self, IntArrayRef blocksize, ::std::optional dense_dim); // {"schema": "aten::to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_bsc(const Tensor & self, IntArrayRef blocksize, ::std::optional dense_dim); // {"schema": "aten::_to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _to_sparse_semi_structured(const Tensor & dense); // {"schema": "aten::_to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor to_mkldnn(const Tensor & self, ::std::optional dtype); // {"schema": "aten::to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_reorder_conv2d_weight(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, OptionalSymIntArrayRef input_size); // {"schema": "aten::mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_reorder_conv3d_weight(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, OptionalSymIntArrayRef input_size); // {"schema": "aten::mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_mkldnn_backward(const Tensor & grad, const Tensor & input); // {"schema": "aten::to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantize_per_tensor_dynamic(const Tensor & self, ScalarType dtype, bool reduce_range); // {"schema": "aten::quantize_per_tensor_dynamic(Tensor self, ScalarType dtype, bool reduce_range) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantize_per_tensor(const Tensor & self, double scale, int64_t zero_point, ScalarType dtype); // {"schema": "aten::quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantize_per_tensor(const Tensor & self, const Tensor & scale, const Tensor & zero_point, ScalarType dtype); // {"schema": "aten::quantize_per_tensor.tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector quantize_per_tensor(TensorList tensors, const Tensor & scales, const Tensor & zero_points, ScalarType dtype); // {"schema": "aten::quantize_per_tensor.tensors(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype) -> Tensor[]", "dispatch": "True", "default": "False"} +Tensor quantize_per_channel(const Tensor & self, const Tensor & scales, const Tensor & zero_points, int64_t axis, ScalarType dtype); // {"schema": "aten::quantize_per_channel(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "False"} +Tensor dequantize(const Tensor & self); // {"schema": "aten::dequantize.self(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector dequantize(TensorList tensors); // {"schema": "aten::dequantize.tensors(Tensor[] tensors) -> Tensor[]", "dispatch": "True", "default": "False"} +double q_scale(const Tensor & self); // {"schema": "aten::q_scale(Tensor self) -> float", "dispatch": "True", "default": "False"} +int64_t q_zero_point(const Tensor & self); // {"schema": "aten::q_zero_point(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor q_per_channel_scales(const Tensor & self); // {"schema": "aten::q_per_channel_scales(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor q_per_channel_zero_points(const Tensor & self); // {"schema": "aten::q_per_channel_zero_points(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +int64_t q_per_channel_axis(const Tensor & self); // {"schema": "aten::q_per_channel_axis(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor int_repr(const Tensor & self); // {"schema": "aten::int_repr(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _make_per_tensor_quantized_tensor(const Tensor & self, double scale, int64_t zero_point); // {"schema": "aten::_make_per_tensor_quantized_tensor(Tensor self, float scale, int zero_point) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _make_per_channel_quantized_tensor(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis); // {"schema": "aten::_make_per_channel_quantized_tensor(Tensor self, Tensor scale, Tensor zero_point, int axis) -> Tensor", "dispatch": "True", "default": "False"} +QScheme qscheme(const Tensor & self); // {"schema": "aten::qscheme(Tensor self) -> QScheme", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_tensor_affine(const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fake_quantize_per_tensor_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_tensor_affine.tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple fake_quantize_per_tensor_affine_cachemask(const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_tensor_affine_cachemask(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +::std::tuple _fake_quantize_per_tensor_affine_cachemask_tensor_qparams(const Tensor & self, const Tensor & scale, const Tensor & zero_point, const Tensor & fake_quant_enabled, int64_t quant_min, int64_t quant_max); // {"schema": "aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, Tensor fake_quant_enabled, int quant_min, int quant_max) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_tensor_affine_cachemask_backward(const Tensor & grad, const Tensor & mask); // {"schema": "aten::fake_quantize_per_tensor_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _fake_quantize_learnable_per_tensor_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_tensor_affine(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _fake_quantize_learnable_per_tensor_affine_backward(const Tensor & grad, const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_tensor_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_channel_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple fake_quantize_per_channel_affine_cachemask(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_channel_affine_cachemask(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_channel_affine_cachemask_backward(const Tensor & grad, const Tensor & mask); // {"schema": "aten::fake_quantize_per_channel_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _fake_quantize_learnable_per_channel_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _fake_quantize_learnable_per_channel_affine_backward(const Tensor & grad, const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_channel_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor fused_moving_avg_obs_fake_quant(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, Tensor & running_min, Tensor & running_max, Tensor & scale, Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); // {"schema": "aten::fused_moving_avg_obs_fake_quant(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _fused_moving_avg_obs_fq_helper(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, Tensor & running_min, Tensor & running_max, Tensor & scale, Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); // {"schema": "aten::_fused_moving_avg_obs_fq_helper(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +::std::tuple _choose_qparams_per_tensor(const Tensor & self, bool reduce_range); // {"schema": "aten::_choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int)", "dispatch": "False", "default": "True"} +Tensor _saturate_weight_to_fp16(const Tensor & weight); // {"schema": "aten::_saturate_weight_to_fp16(Tensor weight) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple choose_qparams_optimized(const Tensor & input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width); // {"schema": "aten::choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor _autocast_to_reduced_precision(const Tensor & self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype); // {"schema": "aten::_autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _autocast_to_full_precision(const Tensor & self, bool cuda_enabled, bool cpu_enabled); // {"schema": "aten::_autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _to_copy(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); // {"schema": "aten::_to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor to(const Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, bool copy, ::std::optional memory_format); // {"schema": "aten::to.dtype_layout(Tensor(a) self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor to(const Tensor & self, Device device, ScalarType dtype, bool non_blocking, bool copy, ::std::optional memory_format); // {"schema": "aten::to.device(Tensor(a) self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor to(const Tensor & self, ScalarType dtype, bool non_blocking, bool copy, ::std::optional memory_format); // {"schema": "aten::to.dtype(Tensor(a) self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor to(const Tensor & self, const Tensor & other, bool non_blocking, bool copy, ::std::optional memory_format); // {"schema": "aten::to.other(Tensor(a) self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +::std::vector meshgrid(TensorList tensors); // {"schema": "aten::meshgrid(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector meshgrid(TensorList tensors, c10::string_view indexing); // {"schema": "aten::meshgrid.indexing(Tensor[] tensors, *, str indexing) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor cartesian_prod(TensorList tensors); // {"schema": "aten::cartesian_prod(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor combinations(const Tensor & self, int64_t r, bool with_replacement); // {"schema": "aten::combinations(Tensor self, int r=2, bool with_replacement=False) -> Tensor", "dispatch": "False", "default": "True"} +Scalar item(const Tensor & self); // {"schema": "aten::item(Tensor self) -> Scalar", "dispatch": "False", "default": "True"} +ScalarType result_type(const Tensor & tensor, const Tensor & other); // {"schema": "aten::result_type.Tensor(Tensor tensor, Tensor other) -> ScalarType", "dispatch": "False", "default": "True"} +ScalarType result_type(const Tensor & tensor, const Scalar & other); // {"schema": "aten::result_type.Scalar(Tensor tensor, Scalar other) -> ScalarType", "dispatch": "False", "default": "True"} +ScalarType result_type(const Scalar & scalar, const Tensor & tensor); // {"schema": "aten::result_type.Scalar_Tensor(Scalar scalar, Tensor tensor) -> ScalarType", "dispatch": "False", "default": "True"} +ScalarType result_type(const Scalar & scalar1, const Scalar & scalar2); // {"schema": "aten::result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType", "dispatch": "False", "default": "True"} +bool can_cast(ScalarType from_, ScalarType to); // {"schema": "aten::can_cast(ScalarType from_, ScalarType to) -> bool", "dispatch": "False", "default": "True"} +ScalarType promote_types(ScalarType type1, ScalarType type2); // {"schema": "aten::promote_types(ScalarType type1, ScalarType type2) -> ScalarType", "dispatch": "False", "default": "True"} +Scalar _local_scalar_dense(const Tensor & self); // {"schema": "aten::_local_scalar_dense(Tensor self) -> Scalar", "dispatch": "True", "default": "False"} +::std::tuple _lstm_mps(const Tensor & input, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::_lstm_mps(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple,::std::vector> lstm_mps_backward(const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const Tensor & z_state, const Tensor & cell_state_fwd, const Tensor & input, const Tensor & layersOutputs, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::lstm_mps_backward(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_lstm_cell(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); // {"schema": "aten::_thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_lstm_cell_backward_impl(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias); // {"schema": "aten::_thnn_fused_lstm_cell_backward_impl(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias); // {"schema": "aten::_thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _thnn_differentiable_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const Tensor & input_gates, const Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const Tensor & cx, const Tensor & cy); // {"schema": "aten::_thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _thnn_fused_gru_cell(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); // {"schema": "aten::_thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_gru_cell_backward(const Tensor & grad_hy, const Tensor & workspace, bool has_bias); // {"schema": "aten::_thnn_fused_gru_cell_backward(Tensor grad_hy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_differentiable_gru_cell_backward(const Tensor & grad_hy, const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); // {"schema": "aten::_thnn_differentiable_gru_cell_backward(Tensor grad_hy, Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias, Tensor? hidden_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple lstm(const Tensor & input, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple lstm(const Tensor & data, const Tensor & batch_sizes, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple gru(const Tensor & input, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple gru(const Tensor & data, const Tensor & batch_sizes, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_tanh(const Tensor & input, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::rnn_tanh.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_tanh(const Tensor & data, const Tensor & batch_sizes, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::rnn_tanh.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_relu(const Tensor & input, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::rnn_relu.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_relu(const Tensor & data, const Tensor & batch_sizes, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple lstm_cell(const Tensor & input, TensorList hx, const Tensor & w_ih, const Tensor & w_hh, const ::std::optional & b_ih, const ::std::optional & b_hh); // {"schema": "aten::lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor gru_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const ::std::optional & b_ih, const ::std::optional & b_hh); // {"schema": "aten::gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor rnn_tanh_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const ::std::optional & b_ih, const ::std::optional & b_hh); // {"schema": "aten::rnn_tanh_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor rnn_relu_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const ::std::optional & b_ih, const ::std::optional & b_hh); // {"schema": "aten::rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple quantized_lstm_cell(const Tensor & input, TensorList hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor quantized_gru_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantized_rnn_relu_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantized_rnn_tanh_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_rnn_tanh_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _pack_padded_sequence(const Tensor & input, const Tensor & lengths, bool batch_first); // {"schema": "aten::_pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor _pack_padded_sequence_backward(const Tensor & grad, c10::SymIntArrayRef input_size, const Tensor & batch_sizes, bool batch_first); // {"schema": "aten::_pack_padded_sequence_backward(Tensor grad, SymInt[] input_size, Tensor batch_sizes, bool batch_first) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _pad_packed_sequence(const Tensor & data, const Tensor & batch_sizes, bool batch_first, const Scalar & padding_value, int64_t total_length); // {"schema": "aten::_pad_packed_sequence(Tensor data, Tensor batch_sizes, bool batch_first, Scalar padding_value, int total_length) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor & set_(Tensor & self, Storage source); // {"schema": "aten::set_.source_Storage(Tensor(a!) self, Storage source) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & set_(Tensor & self, Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & set_(Tensor & self, const Tensor & source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & set_(Tensor & self, const Tensor & source); // {"schema": "aten::set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & set_(Tensor & self); // {"schema": "aten::set_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lift(const Tensor & self); // {"schema": "aten::lift(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor lift_fresh(const Tensor & self); // {"schema": "aten::lift_fresh(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor lift_fresh_copy(const Tensor & self); // {"schema": "aten::lift_fresh_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +bool is_set_to(const Tensor & self, const Tensor & tensor); // {"schema": "aten::is_set_to(Tensor self, Tensor tensor) -> bool", "dispatch": "True", "default": "False"} +Tensor & masked_fill_(Tensor & self, const Tensor & mask, const Scalar & value); // {"schema": "aten::masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_fill(const Tensor & self, const Tensor & mask, const Scalar & value); // {"schema": "aten::masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & masked_fill_(Tensor & self, const Tensor & mask, const Tensor & value); // {"schema": "aten::masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_fill(const Tensor & self, const Tensor & mask, const Tensor & value); // {"schema": "aten::masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & masked_scatter_(Tensor & self, const Tensor & mask, const Tensor & source); // {"schema": "aten::masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_scatter(const Tensor & self, const Tensor & mask, const Tensor & source); // {"schema": "aten::masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor masked_scatter_backward(const Tensor & grad_output, const Tensor & mask, c10::SymIntArrayRef sizes); // {"schema": "aten::masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _masked_softmax(const Tensor & self, const Tensor & mask, ::std::optional dim, ::std::optional mask_type); // {"schema": "aten::_masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _masked_softmax_backward(const Tensor & grad_output, const Tensor & output, const Tensor & mask, ::std::optional dim); // {"schema": "aten::_masked_softmax_backward(Tensor grad_output, Tensor output, Tensor mask, int? dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor view(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::view(Tensor(a) self, SymInt[] size) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor view(const Tensor & self, ScalarType dtype); // {"schema": "aten::view.dtype(Tensor(a) self, ScalarType dtype) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & put_(Tensor & self, const Tensor & index, const Tensor & source, bool accumulate); // {"schema": "aten::put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor put(const Tensor & self, const Tensor & index, const Tensor & source, bool accumulate); // {"schema": "aten::put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_add_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha, Tensor & out); // {"schema": "aten::index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & index_add_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha); // {"schema": "aten::index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_add(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha); // {"schema": "aten::index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor index_add(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & source, const Scalar & alpha); // {"schema": "aten::index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & index_reduce_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, c10::string_view reduce, bool include_self, Tensor & out); // {"schema": "aten::index_reduce.out(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & index_reduce_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, c10::string_view reduce, bool include_self); // {"schema": "aten::index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_reduce(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, c10::string_view reduce, bool include_self); // {"schema": "aten::index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_fill_(Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor index_fill(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_fill_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor index_fill(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_fill_(Tensor & self, Dimname dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & index_fill_(Tensor & self, Dimname dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill_.Dimname_Tensor(Tensor(a!) self, Dimname dim, Tensor index, Tensor value) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor index_fill(const Tensor & self, Dimname dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill.Dimname_Scalar(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor", "dispatch": "False", "default": "True"} +Tensor index_fill(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill.Dimname_Tensor(Tensor self, Dimname dim, Tensor index, Tensor value) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, Tensor & out); // {"schema": "aten::scatter.src_out(Tensor self, int dim, Tensor index, Tensor src, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, Tensor & out); // {"schema": "aten::scatter.value_out(Tensor self, int dim, Tensor index, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce); // {"schema": "aten::scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce); // {"schema": "aten::scatter_.reduce(Tensor(a!) self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, Tensor & out); // {"schema": "aten::scatter.reduce_out(Tensor self, int dim, Tensor index, Tensor src, *, str reduce, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce); // {"schema": "aten::scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce); // {"schema": "aten::scatter_.value_reduce(Tensor(a!) self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce, Tensor & out); // {"schema": "aten::scatter.value_reduce_out(Tensor self, int dim, Tensor index, Scalar value, *, str reduce, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter.dimname_src(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter(const Tensor & self, Dimname dim, const Tensor & index, const Scalar & value); // {"schema": "aten::scatter.dimname_value(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter_add(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_add_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_add_(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_add_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, Tensor & out); // {"schema": "aten::scatter_add.out(Tensor self, int dim, Tensor index, Tensor src, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter_add(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter_reduce(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, bool include_self); // {"schema": "aten::scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_reduce_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, bool include_self); // {"schema": "aten::scatter_reduce_.two(Tensor(a!) self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_reduce_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, bool include_self, Tensor & out); // {"schema": "aten::scatter_reduce.two_out(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & eq_(Tensor & self, const Scalar & other); // {"schema": "aten::eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & eq_(Tensor & self, const Tensor & other); // {"schema": "aten::eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_and.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bitwise_and_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_and.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_and(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_and.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_and(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_and.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_and(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor __and__(const Tensor & self, const Scalar & other); // {"schema": "aten::__and__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor __and__(const Tensor & self, const Tensor & other); // {"schema": "aten::__and__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & __iand__(Tensor & self, const Scalar & other); // {"schema": "aten::__iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & __iand__(Tensor & self, const Tensor & other); // {"schema": "aten::__iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & bitwise_or_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_or.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bitwise_or_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_or.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_or(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_or(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_or(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_or_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_or_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor __or__(const Tensor & self, const Scalar & other); // {"schema": "aten::__or__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor __or__(const Tensor & self, const Tensor & other); // {"schema": "aten::__or__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & __ior__(Tensor & self, const Scalar & other); // {"schema": "aten::__ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & __ior__(Tensor & self, const Tensor & other); // {"schema": "aten::__ior__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & bitwise_xor_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_xor.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bitwise_xor_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_xor(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_xor(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_xor(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_xor_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_xor_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor __xor__(const Tensor & self, const Scalar & other); // {"schema": "aten::__xor__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor __xor__(const Tensor & self, const Tensor & other); // {"schema": "aten::__xor__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & __ixor__(Tensor & self, const Scalar & other); // {"schema": "aten::__ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & __ixor__(Tensor & self, const Tensor & other); // {"schema": "aten::__ixor__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor __lshift__(const Tensor & self, const Scalar & other); // {"schema": "aten::__lshift__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor __lshift__(const Tensor & self, const Tensor & other); // {"schema": "aten::__lshift__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & __ilshift__(Tensor & self, const Scalar & other); // {"schema": "aten::__ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & __ilshift__(Tensor & self, const Tensor & other); // {"schema": "aten::__ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_left_shift(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_left_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_left_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_left_shift(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_left_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_left_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_left_shift(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_left_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor __rshift__(const Tensor & self, const Scalar & other); // {"schema": "aten::__rshift__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor __rshift__(const Tensor & self, const Tensor & other); // {"schema": "aten::__rshift__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & __irshift__(Tensor & self, const Scalar & other); // {"schema": "aten::__irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & __irshift__(Tensor & self, const Tensor & other); // {"schema": "aten::__irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_right_shift(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_right_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_right_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_right_shift(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_right_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_right_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_right_shift(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_right_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tril_(Tensor & self, int64_t diagonal); // {"schema": "aten::tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & triu_(Tensor & self, int64_t diagonal); // {"schema": "aten::triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & digamma_(Tensor & self); // {"schema": "aten::digamma_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lerp_(Tensor & self, const Tensor & end, const Scalar & weight); // {"schema": "aten::lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lerp_(Tensor & self, const Tensor & end, const Tensor & weight); // {"schema": "aten::lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addbmm_(Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & addbmm_out(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addbmm(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & random_(Tensor & self, int64_t from, ::std::optional to, ::std::optional generator); // {"schema": "aten::random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & random_(Tensor & self, int64_t to, ::std::optional generator); // {"schema": "aten::random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & random_(Tensor & self, ::std::optional generator); // {"schema": "aten::random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & uniform_(Tensor & self, double from, double to, ::std::optional generator); // {"schema": "aten::uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & cauchy_(Tensor & self, double median, double sigma, ::std::optional generator); // {"schema": "aten::cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & log_normal_(Tensor & self, double mean, double std, ::std::optional generator); // {"schema": "aten::log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & exponential_(Tensor & self, double lambd, ::std::optional generator); // {"schema": "aten::exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & geometric_(Tensor & self, double p, ::std::optional generator); // {"schema": "aten::geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & diag_out(const Tensor & self, int64_t diagonal, Tensor & out); // {"schema": "aten::diag.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor diag(const Tensor & self, int64_t diagonal); // {"schema": "aten::diag(Tensor self, int diagonal=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cross_out(const Tensor & self, const Tensor & other, ::std::optional dim, Tensor & out); // {"schema": "aten::cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor cross(const Tensor & self, const Tensor & other, ::std::optional dim); // {"schema": "aten::cross(Tensor self, Tensor other, int? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & triu_out(const Tensor & self, int64_t diagonal, Tensor & out); // {"schema": "aten::triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor triu(const Tensor & self, int64_t diagonal); // {"schema": "aten::triu(Tensor self, int diagonal=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tril_out(const Tensor & self, int64_t diagonal, Tensor & out); // {"schema": "aten::tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tril(const Tensor & self, int64_t diagonal); // {"schema": "aten::tril(Tensor self, int diagonal=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor tril_indices(int64_t row, int64_t col, int64_t offset, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor triu_indices(int64_t row, int64_t col, int64_t offset, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::triu_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor trace(const Tensor & self); // {"schema": "aten::trace(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor trace_backward(const Tensor & grad, c10::SymIntArrayRef sizes); // {"schema": "aten::trace_backward(Tensor grad, SymInt[] sizes) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & ne_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::ne.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ne(const Tensor & self, const Scalar & other); // {"schema": "aten::ne.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ne_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ne(const Tensor & self, const Tensor & other); // {"schema": "aten::ne.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ne_(Tensor & self, const Scalar & other); // {"schema": "aten::ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ne_(Tensor & self, const Tensor & other); // {"schema": "aten::ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & not_equal_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::not_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor not_equal(const Tensor & self, const Scalar & other); // {"schema": "aten::not_equal.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & not_equal_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::not_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor not_equal(const Tensor & self, const Tensor & other); // {"schema": "aten::not_equal.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & not_equal_(Tensor & self, const Scalar & other); // {"schema": "aten::not_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & not_equal_(Tensor & self, const Tensor & other); // {"schema": "aten::not_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & eq_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor eq(const Tensor & self, const Scalar & other); // {"schema": "aten::eq.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & eq_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor eq(const Tensor & self, const Tensor & other); // {"schema": "aten::eq.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ge_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ge(const Tensor & self, const Scalar & other); // {"schema": "aten::ge.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ge_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ge(const Tensor & self, const Tensor & other); // {"schema": "aten::ge.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ge_(Tensor & self, const Scalar & other); // {"schema": "aten::ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ge_(Tensor & self, const Tensor & other); // {"schema": "aten::ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & greater_equal_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::greater_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater_equal(const Tensor & self, const Scalar & other); // {"schema": "aten::greater_equal.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_equal_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::greater_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater_equal(const Tensor & self, const Tensor & other); // {"schema": "aten::greater_equal.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_equal_(Tensor & self, const Scalar & other); // {"schema": "aten::greater_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & greater_equal_(Tensor & self, const Tensor & other); // {"schema": "aten::greater_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & le_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor le(const Tensor & self, const Scalar & other); // {"schema": "aten::le.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & le_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor le(const Tensor & self, const Tensor & other); // {"schema": "aten::le.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & le_(Tensor & self, const Scalar & other); // {"schema": "aten::le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & le_(Tensor & self, const Tensor & other); // {"schema": "aten::le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & less_equal_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::less_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less_equal(const Tensor & self, const Scalar & other); // {"schema": "aten::less_equal.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_equal_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::less_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less_equal(const Tensor & self, const Tensor & other); // {"schema": "aten::less_equal.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_equal_(Tensor & self, const Scalar & other); // {"schema": "aten::less_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & less_equal_(Tensor & self, const Tensor & other); // {"schema": "aten::less_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & gt_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gt(const Tensor & self, const Scalar & other); // {"schema": "aten::gt.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gt_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gt(const Tensor & self, const Tensor & other); // {"schema": "aten::gt.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gt_(Tensor & self, const Scalar & other); // {"schema": "aten::gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & gt_(Tensor & self, const Tensor & other); // {"schema": "aten::gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & greater_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::greater.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater(const Tensor & self, const Scalar & other); // {"schema": "aten::greater.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::greater.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater(const Tensor & self, const Tensor & other); // {"schema": "aten::greater.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_(Tensor & self, const Scalar & other); // {"schema": "aten::greater_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & greater_(Tensor & self, const Tensor & other); // {"schema": "aten::greater_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & lt_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lt(const Tensor & self, const Scalar & other); // {"schema": "aten::lt.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lt_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lt(const Tensor & self, const Tensor & other); // {"schema": "aten::lt.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lt_(Tensor & self, const Scalar & other); // {"schema": "aten::lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lt_(Tensor & self, const Tensor & other); // {"schema": "aten::lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & less_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::less.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less(const Tensor & self, const Scalar & other); // {"schema": "aten::less.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::less.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less(const Tensor & self, const Tensor & other); // {"schema": "aten::less.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_(Tensor & self, const Scalar & other); // {"schema": "aten::less_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & less_(Tensor & self, const Tensor & other); // {"schema": "aten::less_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & take_out(const Tensor & self, const Tensor & index, Tensor & out); // {"schema": "aten::take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor take(const Tensor & self, const Tensor & index); // {"schema": "aten::take(Tensor self, Tensor index) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & take_along_dim_out(const Tensor & self, const Tensor & indices, ::std::optional dim, Tensor & out); // {"schema": "aten::take_along_dim.out(Tensor self, Tensor indices, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor take_along_dim(const Tensor & self, const Tensor & indices, ::std::optional dim); // {"schema": "aten::take_along_dim(Tensor self, Tensor indices, int? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & index_select_out(const Tensor & self, int64_t dim, const Tensor & index, Tensor & out); // {"schema": "aten::index_select.out(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor index_select(const Tensor & self, int64_t dim, const Tensor & index); // {"schema": "aten::index_select(Tensor self, int dim, Tensor index) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & index_select_out(const Tensor & self, Dimname dim, const Tensor & index, Tensor & out); // {"schema": "aten::index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor index_select(const Tensor & self, Dimname dim, const Tensor & index); // {"schema": "aten::index_select.dimname(Tensor self, Dimname dim, Tensor index) -> Tensor", "dispatch": "False", "default": "True"} +Tensor index_select_backward(const Tensor & grad, c10::SymIntArrayRef self_sizes, int64_t dim, const Tensor & index); // {"schema": "aten::index_select_backward(Tensor grad, SymInt[] self_sizes, int dim, Tensor index) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & masked_select_out(const Tensor & self, const Tensor & mask, Tensor & out); // {"schema": "aten::masked_select.out(Tensor self, Tensor mask, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_select(const Tensor & self, const Tensor & mask); // {"schema": "aten::masked_select(Tensor self, Tensor mask) -> Tensor", "dispatch": "True", "default": "False"} +Tensor masked_select_backward(const Tensor & grad, const Tensor & input, const Tensor & mask); // {"schema": "aten::masked_select_backward(Tensor grad, Tensor input, Tensor mask) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nonzero_out(const Tensor & self, Tensor & out); // {"schema": "aten::nonzero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nonzero(const Tensor & self); // {"schema": "aten::nonzero(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & nonzero_static_out(const Tensor & self, int64_t size, int64_t fill_value, Tensor & out); // {"schema": "aten::nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nonzero_static(const Tensor & self, int64_t size, int64_t fill_value); // {"schema": "aten::nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector nonzero_numpy(const Tensor & self); // {"schema": "aten::nonzero_numpy(Tensor self) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor argwhere(const Tensor & self); // {"schema": "aten::argwhere(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & gather_out(const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad, Tensor & out); // {"schema": "aten::gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gather(const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad); // {"schema": "aten::gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor gather_backward(const Tensor & grad, const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad); // {"schema": "aten::gather_backward(Tensor grad, Tensor self, int dim, Tensor index, bool sparse_grad) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & gather_out(const Tensor & self, Dimname dim, const Tensor & index, bool sparse_grad, Tensor & out); // {"schema": "aten::gather.dimname_out(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor gather(const Tensor & self, Dimname dim, const Tensor & index, bool sparse_grad); // {"schema": "aten::gather.dimname(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _gather_sparse_backward(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & grad); // {"schema": "aten::_gather_sparse_backward(Tensor self, int dim, Tensor index, Tensor grad) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & addcmul_out(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value, Tensor & out); // {"schema": "aten::addcmul.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addcmul(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addcmul_(Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addcdiv_out(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value, Tensor & out); // {"schema": "aten::addcdiv.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addcdiv(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addcdiv_(Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor cross_entropy_loss(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, double label_smoothing); // {"schema": "aten::cross_entropy_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100, float label_smoothing=0.0) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple triangular_solve_out(const Tensor & self, const Tensor & A, bool upper, bool transpose, bool unitriangular, Tensor & X, Tensor & M); // {"schema": "aten::triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient)", "dispatch": "True", "default": "False"} +::std::tuple triangular_solve(const Tensor & self, const Tensor & A, bool upper, bool transpose, bool unitriangular); // {"schema": "aten::triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)", "dispatch": "True", "default": "True"} +void _linalg_check_errors(const Tensor & info, c10::string_view api_name, bool is_matrix); // {"schema": "aten::_linalg_check_errors(Tensor info, str api_name, *, bool is_matrix) -> ()", "dispatch": "True", "default": "True"} +Tensor & linalg_solve_triangular_out(const Tensor & self, const Tensor & B, bool upper, bool left, bool unitriangular, Tensor & out); // {"schema": "aten::linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor linalg_solve_triangular(const Tensor & self, const Tensor & B, bool upper, bool left, bool unitriangular); // {"schema": "aten::linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor linalg_vander(const Tensor & x, ::std::optional N); // {"schema": "aten::linalg_vander(Tensor x, *, SymInt? N=None) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple svd_out(const Tensor & self, bool some, bool compute_uv, Tensor & U, Tensor & S, Tensor & V); // {"schema": "aten::svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)", "dispatch": "False", "default": "True"} +::std::tuple svd(const Tensor & self, bool some, bool compute_uv); // {"schema": "aten::svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)", "dispatch": "False", "default": "True"} +Tensor swapaxes(const Tensor & self, int64_t axis0, int64_t axis1); // {"schema": "aten::swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor & swapaxes_(Tensor & self, int64_t axis0, int64_t axis1); // {"schema": "aten::swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor swapdims(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor & swapdims_(Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::swapdims_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & cholesky_out(const Tensor & self, bool upper, Tensor & out); // {"schema": "aten::cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cholesky(const Tensor & self, bool upper); // {"schema": "aten::cholesky(Tensor self, bool upper=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & cholesky_solve_out(const Tensor & self, const Tensor & input2, bool upper, Tensor & out); // {"schema": "aten::cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor cholesky_solve(const Tensor & self, const Tensor & input2, bool upper); // {"schema": "aten::cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _cholesky_solve_helper(const Tensor & self, const Tensor & A, bool upper); // {"schema": "aten::_cholesky_solve_helper(Tensor self, Tensor A, bool upper) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cholesky_inverse(const Tensor & self, bool upper); // {"schema": "aten::cholesky_inverse(Tensor self, bool upper=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & cholesky_inverse_out(const Tensor & self, bool upper, Tensor & out); // {"schema": "aten::cholesky_inverse.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple qr_out(const Tensor & self, bool some, Tensor & Q, Tensor & R); // {"schema": "aten::qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)", "dispatch": "False", "default": "True"} +::std::tuple qr(const Tensor & self, bool some); // {"schema": "aten::qr(Tensor self, bool some=True) -> (Tensor Q, Tensor R)", "dispatch": "False", "default": "True"} +::std::tuple geqrf_out(const Tensor & self, Tensor & a, Tensor & tau); // {"schema": "aten::geqrf.a(Tensor self, *, Tensor(a!) a, Tensor(b!) tau) -> (Tensor(a!) a, Tensor(b!) tau)", "dispatch": "True", "default": "False"} +::std::tuple geqrf(const Tensor & self); // {"schema": "aten::geqrf(Tensor self) -> (Tensor a, Tensor tau)", "dispatch": "True", "default": "False"} +Tensor orgqr(const Tensor & self, const Tensor & input2); // {"schema": "aten::orgqr(Tensor self, Tensor input2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & orgqr_out(const Tensor & self, const Tensor & input2, Tensor & out); // {"schema": "aten::orgqr.out(Tensor self, Tensor input2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & ormqr_out(const Tensor & self, const Tensor & input2, const Tensor & input3, bool left, bool transpose, Tensor & out); // {"schema": "aten::ormqr.out(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ormqr(const Tensor & self, const Tensor & input2, const Tensor & input3, bool left, bool transpose); // {"schema": "aten::ormqr(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _lu_with_info(const Tensor & self, bool pivot, bool check_errors); // {"schema": "aten::_lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor LU, Tensor pivots, Tensor info)", "dispatch": "False", "default": "True"} +Tensor & lu_solve_out(const Tensor & self, const Tensor & LU_data, const Tensor & LU_pivots, Tensor & out); // {"schema": "aten::lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor lu_solve(const Tensor & self, const Tensor & LU_data, const Tensor & LU_pivots); // {"schema": "aten::lu_solve(Tensor self, Tensor LU_data, Tensor LU_pivots) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple lu_unpack(const Tensor & LU_data, const Tensor & LU_pivots, bool unpack_data, bool unpack_pivots); // {"schema": "aten::lu_unpack(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True) -> (Tensor P, Tensor L, Tensor U)", "dispatch": "True", "default": "True"} +::std::tuple lu_unpack_out(const Tensor & LU_data, const Tensor & LU_pivots, bool unpack_data, bool unpack_pivots, Tensor & P, Tensor & L, Tensor & U); // {"schema": "aten::lu_unpack.out(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True, *, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)", "dispatch": "True", "default": "False"} +Tensor & multinomial_out(const Tensor & self, int64_t num_samples, bool replacement, ::std::optional generator, Tensor & out); // {"schema": "aten::multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multinomial(const Tensor & self, int64_t num_samples, bool replacement, ::std::optional generator); // {"schema": "aten::multinomial(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & lgamma_out(const Tensor & self, Tensor & out); // {"schema": "aten::lgamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & lgamma_(Tensor & self); // {"schema": "aten::lgamma_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor lgamma(const Tensor & self); // {"schema": "aten::lgamma(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & digamma_out(const Tensor & self, Tensor & out); // {"schema": "aten::digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor digamma(const Tensor & self); // {"schema": "aten::digamma(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & polygamma_out(int64_t n, const Tensor & self, Tensor & out); // {"schema": "aten::polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor polygamma(int64_t n, const Tensor & self); // {"schema": "aten::polygamma(int n, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & polygamma_(Tensor & self, int64_t n); // {"schema": "aten::polygamma_(Tensor(a!) self, int n) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor erfinv(const Tensor & self); // {"schema": "aten::erfinv(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & erfinv_(Tensor & self); // {"schema": "aten::erfinv_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & erfinv_out(const Tensor & self, Tensor & out); // {"schema": "aten::erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor i0(const Tensor & self); // {"schema": "aten::i0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & i0_(Tensor & self); // {"schema": "aten::i0_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & i0_out(const Tensor & self, Tensor & out); // {"schema": "aten::i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sign(const Tensor & self); // {"schema": "aten::sign(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sign_(Tensor & self); // {"schema": "aten::sign_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sign_out(const Tensor & self, Tensor & out); // {"schema": "aten::sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor signbit(const Tensor & self); // {"schema": "aten::signbit(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & signbit_out(const Tensor & self, Tensor & out); // {"schema": "aten::signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor dist(const Tensor & self, const Tensor & other, const Scalar & p); // {"schema": "aten::dist(Tensor self, Tensor other, Scalar p=2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & atan2_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & atan2_(Tensor & self, const Tensor & other); // {"schema": "aten::atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor atan2(const Tensor & self, const Tensor & other); // {"schema": "aten::atan2(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor arctan2(const Tensor & self, const Tensor & other); // {"schema": "aten::arctan2(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arctan2_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::arctan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arctan2_(Tensor & self, const Tensor & other); // {"schema": "aten::arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & lerp_out(const Tensor & self, const Tensor & end, const Scalar & weight, Tensor & out); // {"schema": "aten::lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & lerp_out(const Tensor & self, const Tensor & end, const Tensor & weight, Tensor & out); // {"schema": "aten::lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lerp(const Tensor & self, const Tensor & end, const Scalar & weight); // {"schema": "aten::lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor", "dispatch": "True", "default": "True"} +Tensor lerp(const Tensor & self, const Tensor & end, const Tensor & weight); // {"schema": "aten::lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & histc_out(const Tensor & self, int64_t bins, const Scalar & min, const Scalar & max, Tensor & out); // {"schema": "aten::histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor histc(const Tensor & self, int64_t bins, const Scalar & min, const Scalar & max); // {"schema": "aten::histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple histogram_out(const Tensor & self, const Tensor & bins, const ::std::optional & weight, bool density, Tensor & hist, Tensor & bin_edges); // {"schema": "aten::histogram.bins_tensor_out(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)", "dispatch": "True", "default": "False"} +::std::tuple histogram(const Tensor & self, const Tensor & bins, const ::std::optional & weight, bool density); // {"schema": "aten::histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)", "dispatch": "True", "default": "False"} +::std::tuple histogram_out(const Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density, Tensor & hist, Tensor & bin_edges); // {"schema": "aten::histogram.bin_ct_out(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)", "dispatch": "True", "default": "False"} +::std::tuple histogram(const Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); // {"schema": "aten::histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)", "dispatch": "True", "default": "False"} +::std::vector _histogramdd_bin_edges(const Tensor & self, IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); // {"schema": "aten::_histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]", "dispatch": "True", "default": "False"} +Tensor _histogramdd_from_bin_cts(const Tensor & self, IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); // {"schema": "aten::_histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _histogramdd_from_bin_tensors(const Tensor & self, TensorList bins, const ::std::optional & weight, bool density); // {"schema": "aten::_histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple> histogramdd(const Tensor & self, IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); // {"schema": "aten::histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)", "dispatch": "False", "default": "True"} +::std::tuple> histogramdd(const Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); // {"schema": "aten::histogramdd.int_bins(Tensor self, int bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)", "dispatch": "False", "default": "True"} +::std::tuple> histogramdd(const Tensor & self, TensorList bins, ::std::optional> range, const ::std::optional & weight, bool density); // {"schema": "aten::histogramdd.TensorList_bins(Tensor self, Tensor[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)", "dispatch": "False", "default": "True"} +Tensor & fmod_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor fmod(const Tensor & self, const Scalar & other); // {"schema": "aten::fmod.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmod_(Tensor & self, const Scalar & other); // {"schema": "aten::fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & fmod_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::fmod.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fmod(const Tensor & self, const Tensor & other); // {"schema": "aten::fmod.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmod_(Tensor & self, const Tensor & other); // {"schema": "aten::fmod_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hypot_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::hypot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hypot(const Tensor & self, const Tensor & other); // {"schema": "aten::hypot(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hypot_(Tensor & self, const Tensor & other); // {"schema": "aten::hypot_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & igamma_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::igamma.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor igamma(const Tensor & self, const Tensor & other); // {"schema": "aten::igamma(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & igamma_(Tensor & self, const Tensor & other); // {"schema": "aten::igamma_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & igammac_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::igammac.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor igammac(const Tensor & self, const Tensor & other); // {"schema": "aten::igammac(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & igammac_(Tensor & self, const Tensor & other); // {"schema": "aten::igammac_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & nextafter_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::nextafter.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nextafter(const Tensor & self, const Tensor & other); // {"schema": "aten::nextafter(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & nextafter_(Tensor & self, const Tensor & other); // {"schema": "aten::nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & remainder_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::remainder.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor remainder(const Tensor & self, const Scalar & other); // {"schema": "aten::remainder.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & remainder_(Tensor & self, const Scalar & other); // {"schema": "aten::remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & remainder_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::remainder.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor remainder(const Tensor & self, const Tensor & other); // {"schema": "aten::remainder.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & remainder_(Tensor & self, const Tensor & other); // {"schema": "aten::remainder_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor remainder(const Scalar & self, const Tensor & other); // {"schema": "aten::remainder.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor min(const Tensor & self); // {"schema": "aten::min(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & min_out(const Tensor & self, Tensor & out); // {"schema": "aten::min.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fmin(const Tensor & self, const Tensor & other); // {"schema": "aten::fmin(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmin_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::fmin.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max(const Tensor & self); // {"schema": "aten::max(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor fmax(const Tensor & self, const Tensor & other); // {"schema": "aten::fmax(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmax_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::fmax.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor maximum(const Tensor & self, const Tensor & other); // {"schema": "aten::maximum(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & maximum_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::maximum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max(const Tensor & self, const Tensor & other); // {"schema": "aten::max.other(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & max_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::max.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & max_out(const Tensor & self, Tensor & out); // {"schema": "aten::max.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor minimum(const Tensor & self, const Tensor & other); // {"schema": "aten::minimum(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & minimum_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::minimum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & min_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::min.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor min(const Tensor & self, const Tensor & other); // {"schema": "aten::min.other(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantile(const Tensor & self, const Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & quantile_out(const Tensor & self, const Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor quantile(const Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & quantile_out(const Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nanquantile(const Tensor & self, const Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nanquantile_out(const Tensor & self, const Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nanquantile(const Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nanquantile_out(const Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple sort_out(const Tensor & self, int64_t dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple sort_out(const Tensor & self, ::std::optional stable, int64_t dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.values_stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple sort(const Tensor & self, int64_t dim, bool descending); // {"schema": "aten::sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple sort(const Tensor & self, ::std::optional stable, int64_t dim, bool descending); // {"schema": "aten::sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple sort_out(const Tensor & self, Dimname dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.dimname_values(Tensor self, Dimname dim, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +::std::tuple sort_out(const Tensor & self, ::std::optional stable, Dimname dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.dimname_values_stable(Tensor self, *, bool? stable, Dimname dim, bool descending=False, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +::std::tuple sort(const Tensor & self, Dimname dim, bool descending); // {"schema": "aten::sort.dimname(Tensor self, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple sort(const Tensor & self, ::std::optional stable, Dimname dim, bool descending); // {"schema": "aten::sort.dimname_stable(Tensor self, *, bool? stable, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +Tensor & msort_out(const Tensor & self, Tensor & out); // {"schema": "aten::msort.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor msort(const Tensor & self); // {"schema": "aten::msort(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor argsort(const Tensor & self, int64_t dim, bool descending); // {"schema": "aten::argsort(Tensor self, int dim=-1, bool descending=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor argsort(const Tensor & self, bool stable, int64_t dim, bool descending); // {"schema": "aten::argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & argsort_out(const Tensor & self, bool stable, int64_t dim, bool descending, Tensor & out); // {"schema": "aten::argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor argsort(const Tensor & self, Dimname dim, bool descending); // {"schema": "aten::argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple topk_out(const Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted, Tensor & values, Tensor & indices); // {"schema": "aten::topk.values(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple topk(const Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted); // {"schema": "aten::topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +Tensor all(const Tensor & self); // {"schema": "aten::all(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & all_out(const Tensor & self, Tensor & out); // {"schema": "aten::all.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor any(const Tensor & self); // {"schema": "aten::any(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & any_out(const Tensor & self, Tensor & out); // {"schema": "aten::any.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & renorm_out(const Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm, Tensor & out); // {"schema": "aten::renorm.out(Tensor self, Scalar p, int dim, Scalar maxnorm, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor renorm(const Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm); // {"schema": "aten::renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & renorm_(Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm); // {"schema": "aten::renorm_(Tensor(a!) self, Scalar p, int dim, Scalar maxnorm) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor unfold(const Tensor & self, int64_t dimension, int64_t size, int64_t step); // {"schema": "aten::unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor unfold_backward(const Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step); // {"schema": "aten::unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor", "dispatch": "True", "default": "False"} +bool equal(const Tensor & self, const Tensor & other); // {"schema": "aten::equal(Tensor self, Tensor other) -> bool", "dispatch": "True", "default": "False"} +Tensor & pow_out(const Tensor & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor pow(const Tensor & self, const Tensor & exponent); // {"schema": "aten::pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & pow_out(const Scalar & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::pow.Scalar_out(Scalar self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor pow(const Scalar & self, const Tensor & exponent); // {"schema": "aten::pow.Scalar(Scalar self, Tensor exponent) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & pow_out(const Tensor & self, const Scalar & exponent, Tensor & out); // {"schema": "aten::pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor pow(const Tensor & self, const Scalar & exponent); // {"schema": "aten::pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & pow_(Tensor & self, const Scalar & exponent); // {"schema": "aten::pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & pow_(Tensor & self, const Tensor & exponent); // {"schema": "aten::pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & float_power_out(const Tensor & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::float_power.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor float_power(const Tensor & self, const Tensor & exponent); // {"schema": "aten::float_power.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & float_power_out(const Scalar & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::float_power.Scalar_out(Scalar self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor float_power(const Scalar & self, const Tensor & exponent); // {"schema": "aten::float_power.Scalar(Scalar self, Tensor exponent) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & float_power_out(const Tensor & self, const Scalar & exponent, Tensor & out); // {"schema": "aten::float_power.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor float_power(const Tensor & self, const Scalar & exponent); // {"schema": "aten::float_power.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & float_power_(Tensor & self, const Scalar & exponent); // {"schema": "aten::float_power_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & float_power_(Tensor & self, const Tensor & exponent); // {"schema": "aten::float_power_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & normal_(Tensor & self, double mean, double std, ::std::optional generator); // {"schema": "aten::normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal_functional(const Tensor & self, double mean, double std, ::std::optional generator); // {"schema": "aten::normal_functional(Tensor self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & normal_out(const Tensor & mean, double std, ::std::optional generator, Tensor & out); // {"schema": "aten::normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal(const Tensor & mean, double std, ::std::optional generator); // {"schema": "aten::normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & normal_out(double mean, const Tensor & std, ::std::optional generator, Tensor & out); // {"schema": "aten::normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal(double mean, const Tensor & std, ::std::optional generator); // {"schema": "aten::normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & normal_out(const Tensor & mean, const Tensor & std, ::std::optional generator, Tensor & out); // {"schema": "aten::normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal(const Tensor & mean, const Tensor & std, ::std::optional generator); // {"schema": "aten::normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor normal(double mean, double std, c10::SymIntArrayRef size, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::normal.float_float(float mean, float std, SymInt[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & normal_out(double mean, double std, c10::SymIntArrayRef size, ::std::optional generator, Tensor & out); // {"schema": "aten::normal.float_float_out(float mean, float std, SymInt[] size, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor alias(const Tensor & self); // {"schema": "aten::alias(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +void _amp_foreach_non_finite_check_and_unscale_(TensorList self, Tensor & found_inf, const Tensor & inv_scale); // {"schema": "aten::_amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()", "dispatch": "True", "default": "False"} +Tensor & _amp_update_scale_(Tensor & self, Tensor & growth_tracker, const Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); // {"schema": "aten::_amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector _foreach_add(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_add.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_add_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_add(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_add_(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_add(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_add_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_add(TensorList self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_add_(TensorList self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sub(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sub_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sub(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_sub.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sub_(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sub(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_sub.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sub_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_mul(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_mul.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_mul_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_mul(TensorList self, TensorList other); // {"schema": "aten::_foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_mul_(TensorList self, TensorList other); // {"schema": "aten::_foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_mul(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_mul_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_mul(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_mul.Tensor(Tensor[] self, Tensor other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_mul_(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_div(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_div_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_div(TensorList self, TensorList other); // {"schema": "aten::_foreach_div.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_div_(TensorList self, TensorList other); // {"schema": "aten::_foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_div(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_div_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_div(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_div_(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_clamp_max(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_clamp_max(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_max.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_max_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_clamp_max(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_max.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_clamp_min(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_min.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_clamp_min(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_min.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_clamp_min(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_min.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_maximum(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_maximum_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_maximum(TensorList self, TensorList other); // {"schema": "aten::_foreach_maximum.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_maximum_(TensorList self, TensorList other); // {"schema": "aten::_foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_maximum(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_maximum_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_minimum(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_minimum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_minimum_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_minimum(TensorList self, TensorList other); // {"schema": "aten::_foreach_minimum.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_minimum_(TensorList self, TensorList other); // {"schema": "aten::_foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_minimum(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_minimum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_minimum_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_minimum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_addcdiv(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcdiv.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_addcdiv(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_addcdiv(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcdiv_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_addcmul(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcmul.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_addcmul(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_addcmul(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_addcmul_(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcmul_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_abs(TensorList self); // {"schema": "aten::_foreach_abs(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_abs_(TensorList self); // {"schema": "aten::_foreach_abs_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_acos(TensorList self); // {"schema": "aten::_foreach_acos(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_acos_(TensorList self); // {"schema": "aten::_foreach_acos_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_asin(TensorList self); // {"schema": "aten::_foreach_asin(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_asin_(TensorList self); // {"schema": "aten::_foreach_asin_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_atan(TensorList self); // {"schema": "aten::_foreach_atan(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_atan_(TensorList self); // {"schema": "aten::_foreach_atan_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_ceil(TensorList self); // {"schema": "aten::_foreach_ceil(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_ceil_(TensorList self); // {"schema": "aten::_foreach_ceil_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_cos(TensorList self); // {"schema": "aten::_foreach_cos(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_cos_(TensorList self); // {"schema": "aten::_foreach_cos_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_cosh(TensorList self); // {"schema": "aten::_foreach_cosh(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_cosh_(TensorList self); // {"schema": "aten::_foreach_cosh_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_erf(TensorList self); // {"schema": "aten::_foreach_erf(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_erf_(TensorList self); // {"schema": "aten::_foreach_erf_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_erfc(TensorList self); // {"schema": "aten::_foreach_erfc(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_erfc_(TensorList self); // {"schema": "aten::_foreach_erfc_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_exp(TensorList self); // {"schema": "aten::_foreach_exp(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_exp_(TensorList self); // {"schema": "aten::_foreach_exp_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_expm1(TensorList self); // {"schema": "aten::_foreach_expm1(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_expm1_(TensorList self); // {"schema": "aten::_foreach_expm1_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_floor(TensorList self); // {"schema": "aten::_foreach_floor(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_floor_(TensorList self); // {"schema": "aten::_foreach_floor_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_frac(TensorList self); // {"schema": "aten::_foreach_frac(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_frac_(TensorList self); // {"schema": "aten::_foreach_frac_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_lerp(TensorList self, TensorList tensors1, TensorList weights); // {"schema": "aten::_foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_lerp_(TensorList self, TensorList tensors1, TensorList weights); // {"schema": "aten::_foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_lerp(TensorList self, TensorList tensors1, const Scalar & weight); // {"schema": "aten::_foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_lerp_(TensorList self, TensorList tensors1, const Scalar & weight); // {"schema": "aten::_foreach_lerp_.Scalar(Tensor(a!)[] self, Tensor[] tensors1, Scalar weight) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_lgamma(TensorList self); // {"schema": "aten::_foreach_lgamma(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_lgamma_(TensorList self); // {"schema": "aten::_foreach_lgamma_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_log(TensorList self); // {"schema": "aten::_foreach_log(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_log_(TensorList self); // {"schema": "aten::_foreach_log_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_log10(TensorList self); // {"schema": "aten::_foreach_log10(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_log10_(TensorList self); // {"schema": "aten::_foreach_log10_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_log1p(TensorList self); // {"schema": "aten::_foreach_log1p(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_log1p_(TensorList self); // {"schema": "aten::_foreach_log1p_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_log2(TensorList self); // {"schema": "aten::_foreach_log2(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_log2_(TensorList self); // {"schema": "aten::_foreach_log2_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_max(TensorList self); // {"schema": "aten::_foreach_max(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_neg(TensorList self); // {"schema": "aten::_foreach_neg(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_neg_(TensorList self); // {"schema": "aten::_foreach_neg_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_norm(TensorList self, const Scalar & ord, ::std::optional dtype); // {"schema": "aten::_foreach_norm.Scalar(Tensor[] self, Scalar ord=2, ScalarType? dtype=None) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_pow(TensorList self, TensorList exponent); // {"schema": "aten::_foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_pow(TensorList self, const Scalar & exponent); // {"schema": "aten::_foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_pow(TensorList self, ArrayRef exponent); // {"schema": "aten::_foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector _foreach_pow(const Scalar & self, TensorList exponent); // {"schema": "aten::_foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_pow_(TensorList self, TensorList exponent); // {"schema": "aten::_foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_(TensorList self, const Scalar & exponent); // {"schema": "aten::_foreach_pow_.Scalar(Tensor(a!)[] self, Scalar exponent) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_(TensorList self, ArrayRef exponent); // {"schema": "aten::_foreach_pow_.ScalarList(Tensor(a!)[] self, Scalar[] exponent) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_reciprocal(TensorList self); // {"schema": "aten::_foreach_reciprocal(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_reciprocal_(TensorList self); // {"schema": "aten::_foreach_reciprocal_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_round(TensorList self); // {"schema": "aten::_foreach_round(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_round_(TensorList self); // {"schema": "aten::_foreach_round_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sigmoid(TensorList self); // {"schema": "aten::_foreach_sigmoid(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sigmoid_(TensorList self); // {"schema": "aten::_foreach_sigmoid_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sign(TensorList self); // {"schema": "aten::_foreach_sign(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sign_(TensorList self); // {"schema": "aten::_foreach_sign_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sin(TensorList self); // {"schema": "aten::_foreach_sin(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sin_(TensorList self); // {"schema": "aten::_foreach_sin_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sinh(TensorList self); // {"schema": "aten::_foreach_sinh(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sinh_(TensorList self); // {"schema": "aten::_foreach_sinh_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_sqrt(TensorList self); // {"schema": "aten::_foreach_sqrt(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_sqrt_(TensorList self); // {"schema": "aten::_foreach_sqrt_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_tan(TensorList self); // {"schema": "aten::_foreach_tan(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_tan_(TensorList self); // {"schema": "aten::_foreach_tan_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_tanh(TensorList self); // {"schema": "aten::_foreach_tanh(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_tanh_(TensorList self); // {"schema": "aten::_foreach_tanh_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_trunc(TensorList self); // {"schema": "aten::_foreach_trunc(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "True"} +void _foreach_trunc_(TensorList self); // {"schema": "aten::_foreach_trunc_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +void _foreach_zero_(TensorList self); // {"schema": "aten::_foreach_zero_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "True"} +void _foreach_copy_(TensorList self, TensorList src, bool non_blocking); // {"schema": "aten::_foreach_copy_(Tensor(a!)[] self, Tensor[] src, bool non_blocking=False) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_copy(TensorList self, TensorList src, bool non_blocking); // {"schema": "aten::_foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out", "dispatch": "True", "default": "True"} +Tensor bucketize(const Tensor & self, const Tensor & boundaries, bool out_int32, bool right); // {"schema": "aten::bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & bucketize_out(const Tensor & self, const Tensor & boundaries, bool out_int32, bool right, Tensor & out); // {"schema": "aten::bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bucketize(const Scalar & self, const Tensor & boundaries, bool out_int32, bool right); // {"schema": "aten::bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor searchsorted(const Tensor & sorted_sequence, const Tensor & self, bool out_int32, bool right, ::std::optional side, const ::std::optional & sorter); // {"schema": "aten::searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & searchsorted_out(const Tensor & sorted_sequence, const Tensor & self, bool out_int32, bool right, ::std::optional side, const ::std::optional & sorter, Tensor & out); // {"schema": "aten::searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor searchsorted(const Tensor & sorted_sequence, const Scalar & self, bool out_int32, bool right, ::std::optional side, const ::std::optional & sorter); // {"schema": "aten::searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & searchsorted_out(const Tensor & sorted_sequence, const Scalar & self, bool out_int32, bool right, ::std::optional side, const ::std::optional & sorter, Tensor & out); // {"schema": "aten::searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _convert_indices_from_coo_to_csr(const Tensor & self, int64_t size, bool out_int32); // {"schema": "aten::_convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _convert_indices_from_coo_to_csr_out(const Tensor & self, int64_t size, bool out_int32, Tensor & out); // {"schema": "aten::_convert_indices_from_coo_to_csr.out(Tensor self, int size, *, bool out_int32=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _convert_indices_from_csr_to_coo(const Tensor & crow_indices, const Tensor & col_indices, bool out_int32, bool transpose); // {"schema": "aten::_convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _convert_indices_from_csr_to_coo_out(const Tensor & crow_indices, const Tensor & col_indices, bool out_int32, bool transpose, Tensor & out); // {"schema": "aten::_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & mse_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & out); // {"schema": "aten::mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mse_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::mse_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mse_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, Tensor & grad_input); // {"schema": "aten::mse_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mse_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor", "dispatch": "True", "default": "False"} +Tensor l1_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & multi_margin_loss_out(const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const ::std::optional & weight, int64_t reduction, Tensor & out); // {"schema": "aten::multi_margin_loss.out(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multi_margin_loss(const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const ::std::optional & weight, int64_t reduction); // {"schema": "aten::multi_margin_loss(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & multi_margin_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const ::std::optional & weight, int64_t reduction, Tensor & grad_input); // {"schema": "aten::multi_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Scalar p, Scalar margin, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multi_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const ::std::optional & weight, int64_t reduction); // {"schema": "aten::multi_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, Scalar p, Scalar margin, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & multilabel_margin_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & out); // {"schema": "aten::multilabel_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor multilabel_margin_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::multilabel_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple multilabel_margin_loss_forward_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & output, Tensor & is_target); // {"schema": "aten::multilabel_margin_loss_forward.output(Tensor self, Tensor target, int reduction, *, Tensor(a!) output, Tensor(b!) is_target) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple multilabel_margin_loss_forward(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::multilabel_margin_loss_forward(Tensor self, Tensor target, int reduction) -> (Tensor output, Tensor is_target)", "dispatch": "True", "default": "False"} +Tensor & multilabel_margin_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, const Tensor & is_target, Tensor & grad_input); // {"schema": "aten::multilabel_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multilabel_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, const Tensor & is_target); // {"schema": "aten::multilabel_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & nll_loss_out(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & out); // {"schema": "aten::nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nll_loss_nd(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss_nd(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100) -> Tensor", "dispatch": "False", "default": "True"} +Tensor nll_loss(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple nll_loss_forward_out(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & output, Tensor & total_weight); // {"schema": "aten::nll_loss_forward.output(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, *, Tensor(a!) output, Tensor(b!) total_weight) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple nll_loss_forward(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss_forward(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index) -> (Tensor output, Tensor total_weight)", "dispatch": "True", "default": "True"} +Tensor & nll_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight, Tensor & grad_input); // {"schema": "aten::nll_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nll_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight); // {"schema": "aten::nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & nll_loss2d_out(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & out); // {"schema": "aten::nll_loss2d.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nll_loss2d(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss2d(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple nll_loss2d_forward_out(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & output, Tensor & total_weight); // {"schema": "aten::nll_loss2d_forward.output(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, *, Tensor(a!) output, Tensor(b!) total_weight) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple nll_loss2d_forward(const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss2d_forward(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index) -> (Tensor output, Tensor total_weight)", "dispatch": "True", "default": "False"} +Tensor & nll_loss2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight, Tensor & grad_input); // {"schema": "aten::nll_loss2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nll_loss2d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight); // {"schema": "aten::nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & smooth_l1_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, double beta, Tensor & out); // {"schema": "aten::smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor smooth_l1_loss(const Tensor & self, const Tensor & target, int64_t reduction, double beta); // {"schema": "aten::smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & smooth_l1_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double beta, Tensor & grad_input); // {"schema": "aten::smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor smooth_l1_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double beta); // {"schema": "aten::smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & huber_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, double delta, Tensor & out); // {"schema": "aten::huber_loss.out(Tensor self, Tensor target, int reduction=Mean, float delta=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor huber_loss(const Tensor & self, const Tensor & target, int64_t reduction, double delta); // {"schema": "aten::huber_loss(Tensor self, Tensor target, int reduction=Mean, float delta=1.0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & huber_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double delta, Tensor & grad_input); // {"schema": "aten::huber_loss_backward.out(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor huber_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double delta); // {"schema": "aten::huber_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & soft_margin_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & out); // {"schema": "aten::soft_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor soft_margin_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::soft_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & soft_margin_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, Tensor & grad_input); // {"schema": "aten::soft_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor soft_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::soft_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & elu_out(const Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, Tensor & out); // {"schema": "aten::elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor elu(const Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale); // {"schema": "aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & elu_backward_out(const Tensor & grad_output, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, bool is_result, const Tensor & self_or_result, Tensor & grad_input); // {"schema": "aten::elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor elu_backward(const Tensor & grad_output, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, bool is_result, const Tensor & self_or_result); // {"schema": "aten::elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & elu_(Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale); // {"schema": "aten::elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & glu_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor glu(const Tensor & self, int64_t dim); // {"schema": "aten::glu(Tensor self, int dim=-1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & glu_backward_out(const Tensor & grad_output, const Tensor & self, int64_t dim, Tensor & grad_input); // {"schema": "aten::glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor glu_backward(const Tensor & grad_output, const Tensor & self, int64_t dim); // {"schema": "aten::glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor glu_jvp(const Tensor & glu, const Tensor & x, const Tensor & dx, int64_t dim); // {"schema": "aten::glu_jvp(Tensor glu, Tensor x, Tensor dx, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor glu_backward_jvp(const Tensor & grad_x, const Tensor & grad_glu, const Tensor & x, const Tensor & dgrad_glu, const Tensor & dx, int64_t dim); // {"schema": "aten::glu_backward_jvp(Tensor grad_x, Tensor grad_glu, Tensor x, Tensor dgrad_glu, Tensor dx, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardsigmoid_out(const Tensor & self, Tensor & out); // {"schema": "aten::hardsigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardsigmoid(const Tensor & self); // {"schema": "aten::hardsigmoid(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hardsigmoid_(Tensor & self); // {"schema": "aten::hardsigmoid_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hardsigmoid_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & grad_input); // {"schema": "aten::hardsigmoid_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardsigmoid_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hardtanh_out(const Tensor & self, const Scalar & min_val, const Scalar & max_val, Tensor & out); // {"schema": "aten::hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardtanh(const Tensor & self, const Scalar & min_val, const Scalar & max_val); // {"schema": "aten::hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardtanh_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & min_val, const Scalar & max_val, Tensor & grad_input); // {"schema": "aten::hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardtanh_backward(const Tensor & grad_output, const Tensor & self, const Scalar & min_val, const Scalar & max_val); // {"schema": "aten::hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardtanh_(Tensor & self, const Scalar & min_val, const Scalar & max_val); // {"schema": "aten::hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & hardswish_out(const Tensor & self, Tensor & out); // {"schema": "aten::hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardswish(const Tensor & self); // {"schema": "aten::hardswish(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardswish_(Tensor & self); // {"schema": "aten::hardswish_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardswish_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::hardswish_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & leaky_relu_out(const Tensor & self, const Scalar & negative_slope, Tensor & out); // {"schema": "aten::leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor leaky_relu(const Tensor & self, const Scalar & negative_slope); // {"schema": "aten::leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & leaky_relu_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & negative_slope, bool self_is_result, Tensor & grad_input); // {"schema": "aten::leaky_relu_backward.grad_input(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor leaky_relu_backward(const Tensor & grad_output, const Tensor & self, const Scalar & negative_slope, bool self_is_result); // {"schema": "aten::leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & leaky_relu_(Tensor & self, const Scalar & negative_slope); // {"schema": "aten::leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log_sigmoid_out(const Tensor & self, Tensor & out); // {"schema": "aten::log_sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor log_sigmoid(const Tensor & self); // {"schema": "aten::log_sigmoid(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple log_sigmoid_forward_out(const Tensor & self, Tensor & output, Tensor & buffer); // {"schema": "aten::log_sigmoid_forward.output(Tensor self, *, Tensor(a!) output, Tensor(b!) buffer) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple log_sigmoid_forward(const Tensor & self); // {"schema": "aten::log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)", "dispatch": "True", "default": "False"} +Tensor & log_sigmoid_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & buffer, Tensor & grad_input); // {"schema": "aten::log_sigmoid_backward.grad_input(Tensor grad_output, Tensor self, Tensor buffer, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log_sigmoid_backward(const Tensor & grad_output, const Tensor & self, const Tensor & buffer); // {"schema": "aten::log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & rrelu_with_noise_out(const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, ::std::optional generator, Tensor & out); // {"schema": "aten::rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor rrelu_with_noise(const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, ::std::optional generator); // {"schema": "aten::rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, bool self_is_result); // {"schema": "aten::rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rrelu_with_noise_(Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, ::std::optional generator); // {"schema": "aten::rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & softplus_out(const Tensor & self, const Scalar & beta, const Scalar & threshold, Tensor & out); // {"schema": "aten::softplus.out(Tensor self, Scalar beta=1, Scalar threshold=20, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softplus(const Tensor & self, const Scalar & beta, const Scalar & threshold); // {"schema": "aten::softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & softplus_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & beta, const Scalar & threshold, Tensor & grad_input); // {"schema": "aten::softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softplus_backward(const Tensor & grad_output, const Tensor & self, const Scalar & beta, const Scalar & threshold); // {"schema": "aten::softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & softshrink_out(const Tensor & self, const Scalar & lambd, Tensor & out); // {"schema": "aten::softshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softshrink(const Tensor & self, const Scalar & lambd); // {"schema": "aten::softshrink(Tensor self, Scalar lambd=0.5) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & softshrink_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & lambd, Tensor & grad_input); // {"schema": "aten::softshrink_backward.grad_input(Tensor grad_output, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softshrink_backward(const Tensor & grad_output, const Tensor & self, const Scalar & lambd); // {"schema": "aten::softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & adaptive_avg_pool2d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::adaptive_avg_pool2d.out(Tensor self, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_avg_pool2d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::adaptive_avg_pool2d(Tensor self, SymInt[2] output_size) -> Tensor", "dispatch": "False", "default": "True"} +Tensor mkldnn_adaptive_avg_pool2d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & mkldnn_adaptive_avg_pool2d_out(const Tensor & self, IntArrayRef output_size, Tensor & out); // {"schema": "aten::mkldnn_adaptive_avg_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mkldnn_adaptive_avg_pool2d_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::mkldnn_adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _adaptive_avg_pool2d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::_adaptive_avg_pool2d(Tensor self, SymInt[2] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _adaptive_avg_pool2d_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::_adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & adaptive_avg_pool3d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_avg_pool3d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _adaptive_avg_pool3d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::_adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & adaptive_avg_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & grad_input); // {"schema": "aten::adaptive_avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _adaptive_avg_pool3d_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::_adaptive_avg_pool3d_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple adaptive_max_pool2d_out(const Tensor & self, IntArrayRef output_size, Tensor & out, Tensor & indices); // {"schema": "aten::adaptive_max_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple adaptive_max_pool2d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & adaptive_max_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::adaptive_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_max_pool2d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & indices); // {"schema": "aten::adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple adaptive_max_pool3d_out(const Tensor & self, IntArrayRef output_size, Tensor & out, Tensor & indices); // {"schema": "aten::adaptive_max_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple adaptive_max_pool3d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_max_pool3d(Tensor self, int[3] output_size) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & adaptive_max_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::adaptive_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_max_pool3d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & indices); // {"schema": "aten::adaptive_max_pool3d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override, Tensor & out); // {"schema": "aten::avg_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); // {"schema": "aten::avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override, Tensor & grad_input); // {"schema": "aten::avg_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool2d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); // {"schema": "aten::avg_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override, Tensor & out); // {"schema": "aten::avg_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); // {"schema": "aten::avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override, Tensor & grad_input); // {"schema": "aten::avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool3d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); // {"schema": "aten::avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple fractional_max_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples, Tensor & output, Tensor & indices); // {"schema": "aten::fractional_max_pool2d.output(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple fractional_max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples); // {"schema": "aten::fractional_max_pool2d(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & fractional_max_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fractional_max_pool2d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices); // {"schema": "aten::fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple fractional_max_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples, Tensor & output, Tensor & indices); // {"schema": "aten::fractional_max_pool3d.output(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple fractional_max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples); // {"schema": "aten::fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & fractional_max_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fractional_max_pool3d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices); // {"schema": "aten::fractional_max_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple max_pool2d_with_indices_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out, Tensor & indices); // {"schema": "aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple max_pool2d_with_indices(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & max_pool2d_with_indices_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::max_pool2d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_pool2d_with_indices_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices); // {"schema": "aten::max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple max_pool3d_with_indices_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out, Tensor & indices); // {"schema": "aten::max_pool3d_with_indices.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple max_pool3d_with_indices(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & max_pool3d_with_indices_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_pool3d_with_indices_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices); // {"schema": "aten::max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & max_unpool2d_out(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_unpool2d(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size); // {"schema": "aten::max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & max_unpool3d_out(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size, IntArrayRef stride, IntArrayRef padding, Tensor & out); // {"schema": "aten::max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_unpool3d(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size, IntArrayRef stride, IntArrayRef padding); // {"schema": "aten::max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & reflection_pad1d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::reflection_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad1d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad1d(Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reflection_pad1d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad1d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad1d_backward(Tensor grad_output, Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reflection_pad2d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::reflection_pad2d.out(Tensor self, SymInt[4] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad2d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad2d(Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & reflection_pad2d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad2d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & reflection_pad3d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::reflection_pad3d.out(Tensor self, SymInt[6] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad3d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad3d(Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reflection_pad3d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::reflection_pad3d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[6] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad3d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad3d_backward(Tensor grad_output, Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad1d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::replication_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad1d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad1d(Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad1d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::replication_pad1d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad1d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad1d_backward(Tensor grad_output, Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad2d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::replication_pad2d.out(Tensor self, SymInt[4] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad2d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad2d(Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad2d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad2d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & replication_pad3d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::replication_pad3d.out(Tensor self, SymInt[6] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad3d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad3d(Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad3d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::replication_pad3d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[6] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad3d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad3d_backward(Tensor grad_output, Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _pad_circular(const Tensor & self, c10::SymIntArrayRef pad); // {"schema": "aten::_pad_circular(Tensor self, SymInt[] pad) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _pad_enum(const Tensor & self, c10::SymIntArrayRef pad, int64_t mode, ::std::optional value); // {"schema": "aten::_pad_enum(Tensor self, SymInt[] pad, int mode, float? value=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor pad(const Tensor & self, c10::SymIntArrayRef pad, c10::string_view mode, ::std::optional value); // {"schema": "aten::pad(Tensor self, SymInt[] pad, str mode=\"constant\", float? value=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_linear1d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); // {"schema": "aten::upsample_linear1d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_bilinear2d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); // {"schema": "aten::upsample_bilinear2d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_bilinear2d_aa(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); // {"schema": "aten::_upsample_bilinear2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_trilinear3d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); // {"schema": "aten::upsample_trilinear3d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_bicubic2d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); // {"schema": "aten::upsample_bicubic2d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_bicubic2d_aa(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); // {"schema": "aten::_upsample_bicubic2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_nearest1d(const Tensor & input, OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); // {"schema": "aten::upsample_nearest1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_nearest_exact1d(const Tensor & input, OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); // {"schema": "aten::_upsample_nearest_exact1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_nearest2d(const Tensor & input, OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); // {"schema": "aten::upsample_nearest2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_nearest_exact2d(const Tensor & input, OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); // {"schema": "aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_nearest3d(const Tensor & input, OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); // {"schema": "aten::upsample_nearest3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_nearest_exact3d(const Tensor & input, OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); // {"schema": "aten::_upsample_nearest_exact3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & upsample_linear1d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales, Tensor & out); // {"schema": "aten::upsample_linear1d.out(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_linear1d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales); // {"schema": "aten::upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_linear1d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales, Tensor & grad_input); // {"schema": "aten::upsample_linear1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_linear1d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales); // {"schema": "aten::upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bilinear2d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::upsample_bilinear2d.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bilinear2d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_bilinear2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bilinear2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_bilinear2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bilinear2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_bilinear2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bilinear2d_aa_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bilinear2d_aa(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bilinear2d_aa_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bilinear2d_aa_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_bilinear2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bicubic2d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::upsample_bicubic2d.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bicubic2d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_bicubic2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bicubic2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_bicubic2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bicubic2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_bicubic2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bicubic2d_aa_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bicubic2d_aa(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bicubic2d_aa_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bicubic2d_aa_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_bicubic2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_trilinear3d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::upsample_trilinear3d.out(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_trilinear3d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_trilinear3d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_trilinear3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_trilinear3d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest1d_out(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, Tensor & out); // {"schema": "aten::upsample_nearest1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact1d_out(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, Tensor & out); // {"schema": "aten::_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest1d(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales); // {"schema": "aten::upsample_nearest1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact1d(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales); // {"schema": "aten::_upsample_nearest_exact1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest1d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, Tensor & grad_input); // {"schema": "aten::upsample_nearest1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact1d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, Tensor & grad_input); // {"schema": "aten::_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest1d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales); // {"schema": "aten::upsample_nearest1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact1d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales); // {"schema": "aten::_upsample_nearest_exact1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest2d_out(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::upsample_nearest2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact2d_out(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest2d(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_nearest2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact2d(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_nearest2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_nearest2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest3d_out(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::upsample_nearest3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact3d_out(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest3d(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact3d(const Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_nearest_exact3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest3d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_nearest3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact3d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest3d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact3d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); // {"schema": "aten::_upsample_nearest_exact3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sigmoid_backward_out(const Tensor & grad_output, const Tensor & output, Tensor & grad_input); // {"schema": "aten::sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sigmoid_backward(const Tensor & grad_output, const Tensor & output); // {"schema": "aten::sigmoid_backward(Tensor grad_output, Tensor output) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logit_backward_out(const Tensor & grad_output, const Tensor & self, ::std::optional eps, Tensor & grad_input); // {"schema": "aten::logit_backward.grad_input(Tensor grad_output, Tensor self, float? eps=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logit_backward(const Tensor & grad_output, const Tensor & self, ::std::optional eps); // {"schema": "aten::logit_backward(Tensor grad_output, Tensor self, float? eps=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tanh_backward_out(const Tensor & grad_output, const Tensor & output, Tensor & grad_input); // {"schema": "aten::tanh_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tanh_backward(const Tensor & grad_output, const Tensor & output); // {"schema": "aten::tanh_backward(Tensor grad_output, Tensor output) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & slow_conv_transpose2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor slow_conv_transpose2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & slow_conv_transpose3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor slow_conv_transpose3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & thnn_conv2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor thnn_conv2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & _slow_conv2d_forward_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & output); // {"schema": "aten::_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _slow_conv2d_forward(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::_slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _slow_conv2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & grad_input, Tensor & grad_weight, Tensor & grad_bias); // {"schema": "aten::_slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "False"} +::std::tuple _slow_conv2d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask); // {"schema": "aten::_slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)", "dispatch": "True", "default": "False"} +const Tensor & _conv_depthwise2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, const Tensor & out); // {"schema": "aten::_conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _conv_depthwise2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::_conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor", "dispatch": "True", "default": "False"} +Tensor conv_depthwise3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & slow_conv3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor slow_conv3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & slow_conv3d_forward_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & output); // {"schema": "aten::slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor slow_conv3d_forward(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor slow_conv_dilated2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor slow_conv_dilated3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & col2im_out(const Tensor & self, c10::SymIntArrayRef output_size, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride, Tensor & out); // {"schema": "aten::col2im.out(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor col2im(const Tensor & self, c10::SymIntArrayRef output_size, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride); // {"schema": "aten::col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor", "dispatch": "True", "default": "False"} +Tensor column_stack(TensorList tensors); // {"schema": "aten::column_stack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & column_stack_out(TensorList tensors, Tensor & out); // {"schema": "aten::column_stack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & im2col_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride, Tensor & out); // {"schema": "aten::im2col.out(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor im2col(const Tensor & self, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride); // {"schema": "aten::im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor", "dispatch": "True", "default": "False"} +Tensor isfinite(const Tensor & self); // {"schema": "aten::isfinite(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor isinf(const Tensor & self); // {"schema": "aten::isinf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +void record_stream(Tensor & self, Stream s); // {"schema": "aten::record_stream(Tensor(a!) self, Stream s) -> ()", "dispatch": "True", "default": "False"} +Tensor isposinf(const Tensor & self); // {"schema": "aten::isposinf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isposinf_out(const Tensor & self, Tensor & out); // {"schema": "aten::isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isneginf(const Tensor & self); // {"schema": "aten::isneginf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isneginf_out(const Tensor & self, Tensor & out); // {"schema": "aten::isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _add_batch_dim(const Tensor & self, int64_t batch_dim, int64_t level); // {"schema": "aten::_add_batch_dim(Tensor self, int batch_dim, int level) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _remove_batch_dim(const Tensor & self, int64_t level, int64_t batch_size, int64_t out_dim); // {"schema": "aten::_remove_batch_dim(Tensor self, int level, int batch_size, int out_dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_entr(const Tensor & self); // {"schema": "aten::special_entr(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_entr_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_entr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_ndtri(const Tensor & self); // {"schema": "aten::special_ndtri(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_ndtri_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_ndtri.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_log_ndtr(const Tensor & self); // {"schema": "aten::special_log_ndtr(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_log_ndtr_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_log_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_expm1(const Tensor & self); // {"schema": "aten::special_expm1(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_expm1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_exp2(const Tensor & self); // {"schema": "aten::special_exp2(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_exp2_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_exp2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_psi(const Tensor & self); // {"schema": "aten::special_psi(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_psi_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_psi.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_digamma(const Tensor & self); // {"schema": "aten::special_digamma(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_digamma_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_gammaln(const Tensor & self); // {"schema": "aten::special_gammaln(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_gammaln_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_gammaln.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_erf(const Tensor & self); // {"schema": "aten::special_erf(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_erf_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_erfc(const Tensor & self); // {"schema": "aten::special_erfc(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_erfc_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_erfcx(const Tensor & self); // {"schema": "aten::special_erfcx(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_erfcx_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erfcx.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_erfinv(const Tensor & self); // {"schema": "aten::special_erfinv(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_erfinv_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_ndtr(const Tensor & self); // {"schema": "aten::special_ndtr(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_ndtr_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_xlog1py(const Tensor & self, const Tensor & other); // {"schema": "aten::special_xlog1py(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_xlog1py(const Scalar & self, const Tensor & other); // {"schema": "aten::special_xlog1py.self_scalar(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_xlog1py(const Tensor & self, const Scalar & other); // {"schema": "aten::special_xlog1py.other_scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_xlog1py_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlog1py.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_xlog1py_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlog1py.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_xlog1py_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::special_xlog1py.other_scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_xlogy(const Tensor & self, const Tensor & other); // {"schema": "aten::special_xlogy(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_xlogy(const Scalar & self, const Tensor & other); // {"schema": "aten::special_xlogy.self_scalar(Scalar self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_xlogy(const Tensor & self, const Scalar & other); // {"schema": "aten::special_xlogy.other_scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_xlogy_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlogy.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & special_xlogy_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlogy.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & special_xlogy_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::special_xlogy.other_scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_zeta(const Tensor & self, const Tensor & other); // {"schema": "aten::special_zeta(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_zeta(const Scalar & self, const Tensor & other); // {"schema": "aten::special_zeta.self_scalar(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_zeta(const Tensor & self, const Scalar & other); // {"schema": "aten::special_zeta.other_scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_zeta_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_zeta.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_zeta_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_zeta.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_zeta_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::special_zeta.other_scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_i0(const Tensor & self); // {"schema": "aten::special_i0(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_i0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_i0e(const Tensor & self); // {"schema": "aten::special_i0e(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_i0e_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i0e.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_i1(const Tensor & self); // {"schema": "aten::special_i1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_i1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_i1e(const Tensor & self); // {"schema": "aten::special_i1e(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_i1e_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i1e.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_logit(const Tensor & self, ::std::optional eps); // {"schema": "aten::special_logit(Tensor self, float? eps=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_logit_out(const Tensor & self, ::std::optional eps, Tensor & out); // {"schema": "aten::special_logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_polygamma(int64_t n, const Tensor & self); // {"schema": "aten::special_polygamma(int n, Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_polygamma_out(int64_t n, const Tensor & self, Tensor & out); // {"schema": "aten::special_polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_logsumexp(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::special_logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_logsumexp_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::special_logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_expit(const Tensor & self); // {"schema": "aten::special_expit(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_expit_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_expit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_sinc(const Tensor & self); // {"schema": "aten::special_sinc(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_sinc_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_sinc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_round(const Tensor & self, int64_t decimals); // {"schema": "aten::special_round(Tensor self, *, int decimals=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_round_out(const Tensor & self, int64_t decimals, Tensor & out); // {"schema": "aten::special_round.out(Tensor self, *, int decimals=0, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_log1p(const Tensor & self); // {"schema": "aten::special_log1p(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_log1p_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_log_softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::special_log_softmax(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_gammainc_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_gammainc.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_gammainc(const Tensor & self, const Tensor & other); // {"schema": "aten::special_gammainc(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_gammaincc_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_gammaincc.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_gammaincc(const Tensor & self, const Tensor & other); // {"schema": "aten::special_gammaincc(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_multigammaln(const Tensor & self, int64_t p); // {"schema": "aten::special_multigammaln(Tensor self, int p) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_multigammaln_out(const Tensor & self, int64_t p, Tensor & out); // {"schema": "aten::special_multigammaln.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::special_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fft_fft(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); // {"schema": "aten::fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_fft_out(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ifft(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); // {"schema": "aten::fft_ifft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ifft_out(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_ifft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_rfft(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); // {"schema": "aten::fft_rfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_rfft_out(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_rfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_irfft(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); // {"schema": "aten::fft_irfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_irfft_out(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_irfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_hfft(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); // {"schema": "aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_hfft_out(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ihfft(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm); // {"schema": "aten::fft_ihfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ihfft_out(const Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_ihfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_fft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_fft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_fft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_fft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ifft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_ifft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ifft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_ifft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_rfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_rfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_rfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_rfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_irfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_irfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_irfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_hfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_hfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_hfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm, const Tensor & out); // {"schema": "aten::fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ihfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_ihfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_ihfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, ::std::optional norm, const Tensor & out); // {"schema": "aten::fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_fftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_fftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_fftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_fftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ifftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_ifftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ifftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_ifftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_rfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_rfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_rfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_rfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_irfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_irfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_irfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm, Tensor & out); // {"schema": "aten::fft_irfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_hfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_hfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm, const Tensor & out); // {"schema": "aten::fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ihfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm); // {"schema": "aten::fft_ihfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_ihfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, ::std::optional norm, const Tensor & out); // {"schema": "aten::fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_fftfreq(int64_t n, double d, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fft_fftfreq_out(int64_t n, double d, Tensor & out); // {"schema": "aten::fft_fftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor fft_rfftfreq(int64_t n, double d, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::fft_rfftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fft_rfftfreq_out(int64_t n, double d, Tensor & out); // {"schema": "aten::fft_rfftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor fft_fftshift(const Tensor & self, OptionalIntArrayRef dim); // {"schema": "aten::fft_fftshift(Tensor self, int[1]? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fft_ifftshift(const Tensor & self, OptionalIntArrayRef dim); // {"schema": "aten::fft_ifftshift(Tensor self, int[1]? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple linalg_cholesky_ex(const Tensor & self, bool upper, bool check_errors); // {"schema": "aten::linalg_cholesky_ex(Tensor self, *, bool upper=False, bool check_errors=False) -> (Tensor L, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_cholesky_ex_out(const Tensor & self, bool upper, bool check_errors, Tensor & L, Tensor & info); // {"schema": "aten::linalg_cholesky_ex.L(Tensor self, *, bool upper=False, bool check_errors=False, Tensor(a!) L, Tensor(b!) info) -> (Tensor(a!) L, Tensor(b!) info)", "dispatch": "True", "default": "False"} +Tensor linalg_cholesky(const Tensor & self, bool upper); // {"schema": "aten::linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_cholesky_out(const Tensor & self, bool upper, Tensor & out); // {"schema": "aten::linalg_cholesky.out(Tensor self, *, bool upper=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_cross(const Tensor & self, const Tensor & other, int64_t dim); // {"schema": "aten::linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_cross_out(const Tensor & self, const Tensor & other, int64_t dim, Tensor & out); // {"schema": "aten::linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple linalg_lu_factor(const Tensor & A, bool pivot); // {"schema": "aten::linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)", "dispatch": "False", "default": "True"} +::std::tuple linalg_lu_factor_out(const Tensor & A, bool pivot, Tensor & LU, Tensor & pivots); // {"schema": "aten::linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)", "dispatch": "False", "default": "True"} +::std::tuple linalg_lu_factor_ex(const Tensor & A, bool pivot, bool check_errors); // {"schema": "aten::linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_lu_factor_ex_out(const Tensor & A, bool pivot, bool check_errors, Tensor & LU, Tensor & pivots, Tensor & info); // {"schema": "aten::linalg_lu_factor_ex.out(Tensor A, *, bool pivot=True, bool check_errors=False, Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info)", "dispatch": "True", "default": "False"} +::std::tuple linalg_lu(const Tensor & A, bool pivot); // {"schema": "aten::linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)", "dispatch": "True", "default": "True"} +::std::tuple linalg_lu_out(const Tensor & A, bool pivot, Tensor & P, Tensor & L, Tensor & U); // {"schema": "aten::linalg_lu.out(Tensor A, *, bool pivot=True, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)", "dispatch": "True", "default": "False"} +Tensor linalg_lu_solve(const Tensor & LU, const Tensor & pivots, const Tensor & B, bool left, bool adjoint); // {"schema": "aten::linalg_lu_solve(Tensor LU, Tensor pivots, Tensor B, *, bool left=True, bool adjoint=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_lu_solve_out(const Tensor & LU, const Tensor & pivots, const Tensor & B, bool left, bool adjoint, Tensor & out); // {"schema": "aten::linalg_lu_solve.out(Tensor LU, Tensor pivots, Tensor B, *, bool left=True, bool adjoint=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple _linalg_det(const Tensor & A); // {"schema": "aten::_linalg_det(Tensor A) -> (Tensor result, Tensor LU, Tensor pivots)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_det_out(const Tensor & A, Tensor & result, Tensor & LU, Tensor & pivots); // {"schema": "aten::_linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)", "dispatch": "True", "default": "False"} +Tensor linalg_det(const Tensor & A); // {"schema": "aten::linalg_det(Tensor A) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_det_out(const Tensor & A, Tensor & out); // {"schema": "aten::linalg_det.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor det(const Tensor & self); // {"schema": "aten::det(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple linalg_ldl_factor_ex(const Tensor & self, bool hermitian, bool check_errors); // {"schema": "aten::linalg_ldl_factor_ex(Tensor self, *, bool hermitian=False, bool check_errors=False) -> (Tensor LD, Tensor pivots, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_ldl_factor_ex_out(const Tensor & self, bool hermitian, bool check_errors, Tensor & LD, Tensor & pivots, Tensor & info); // {"schema": "aten::linalg_ldl_factor_ex.out(Tensor self, *, bool hermitian=False, bool check_errors=False, Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info)", "dispatch": "True", "default": "False"} +::std::tuple linalg_ldl_factor(const Tensor & self, bool hermitian); // {"schema": "aten::linalg_ldl_factor(Tensor self, *, bool hermitian=False) -> (Tensor LD, Tensor pivots)", "dispatch": "False", "default": "True"} +::std::tuple linalg_ldl_factor_out(const Tensor & self, bool hermitian, Tensor & LD, Tensor & pivots); // {"schema": "aten::linalg_ldl_factor.out(Tensor self, *, bool hermitian=False, Tensor(a!) LD, Tensor(b!) pivots) -> (Tensor(a!) LD, Tensor(b!) pivots)", "dispatch": "False", "default": "True"} +Tensor linalg_ldl_solve(const Tensor & LD, const Tensor & pivots, const Tensor & B, bool hermitian); // {"schema": "aten::linalg_ldl_solve(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_ldl_solve_out(const Tensor & LD, const Tensor & pivots, const Tensor & B, bool hermitian, Tensor & out); // {"schema": "aten::linalg_ldl_solve.out(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple linalg_lstsq(const Tensor & self, const Tensor & b, ::std::optional rcond, ::std::optional driver); // {"schema": "aten::linalg_lstsq(Tensor self, Tensor b, float? rcond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)", "dispatch": "True", "default": "True"} +::std::tuple linalg_lstsq_out(const Tensor & self, const Tensor & b, ::std::optional rcond, ::std::optional driver, Tensor & solution, Tensor & residuals, Tensor & rank, Tensor & singular_values); // {"schema": "aten::linalg_lstsq.out(Tensor self, Tensor b, float? rcond=None, *, str? driver=None, Tensor(a!) solution, Tensor(b!) residuals, Tensor(c!) rank, Tensor(d!) singular_values) -> (Tensor(a!) solution, Tensor(b!) residuals, Tensor(c!) rank, Tensor(d!) singular_values)", "dispatch": "True", "default": "False"} +Tensor linalg_matmul(const Tensor & self, const Tensor & other); // {"schema": "aten::linalg_matmul(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matmul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_vecdot(const Tensor & x, const Tensor & y, int64_t dim); // {"schema": "aten::linalg_vecdot(Tensor x, Tensor y, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_vecdot_out(const Tensor & x, const Tensor & y, int64_t dim, Tensor & out); // {"schema": "aten::linalg_vecdot.out(Tensor x, Tensor y, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_exp(const Tensor & self); // {"schema": "aten::linalg_matrix_exp(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _linalg_slogdet(const Tensor & A); // {"schema": "aten::_linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet, Tensor LU, Tensor pivots)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_slogdet_out(const Tensor & A, Tensor & sign, Tensor & logabsdet, Tensor & LU, Tensor & pivots); // {"schema": "aten::_linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)", "dispatch": "True", "default": "False"} +::std::tuple linalg_slogdet(const Tensor & A); // {"schema": "aten::linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet)", "dispatch": "False", "default": "True"} +::std::tuple linalg_slogdet_out(const Tensor & A, Tensor & sign, Tensor & logabsdet); // {"schema": "aten::linalg_slogdet.out(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet)", "dispatch": "False", "default": "True"} +::std::tuple slogdet(const Tensor & self); // {"schema": "aten::slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)", "dispatch": "False", "default": "True"} +::std::tuple slogdet_out(const Tensor & self, Tensor & sign, Tensor & logabsdet); // {"schema": "aten::slogdet.out(Tensor self, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet)", "dispatch": "False", "default": "True"} +Tensor logdet(const Tensor & self); // {"schema": "aten::logdet(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple linalg_eig(const Tensor & self); // {"schema": "aten::linalg_eig(Tensor self) -> (Tensor eigenvalues, Tensor eigenvectors)", "dispatch": "True", "default": "False"} +::std::tuple linalg_eig_out(const Tensor & self, Tensor & eigenvalues, Tensor & eigenvectors); // {"schema": "aten::linalg_eig.out(Tensor self, *, Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)", "dispatch": "True", "default": "False"} +Tensor _linalg_eigvals(const Tensor & self); // {"schema": "aten::_linalg_eigvals(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor linalg_eigvals(const Tensor & self); // {"schema": "aten::linalg_eigvals(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_eigvals_out(const Tensor & self, Tensor & out); // {"schema": "aten::linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple _linalg_eigh(const Tensor & A, c10::string_view UPLO, bool compute_v); // {"schema": "aten::_linalg_eigh(Tensor A, str UPLO=\"L\", bool compute_v=True) -> (Tensor eigenvalues, Tensor eigenvectors)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_eigh_out(const Tensor & A, c10::string_view UPLO, bool compute_v, Tensor & eigenvalues, Tensor & eigenvectors); // {"schema": "aten::_linalg_eigh.eigenvalues(Tensor A, str UPLO=\"L\", bool compute_v=True, *, Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)", "dispatch": "True", "default": "False"} +::std::tuple linalg_eigh(const Tensor & self, c10::string_view UPLO); // {"schema": "aten::linalg_eigh(Tensor self, str UPLO=\"L\") -> (Tensor eigenvalues, Tensor eigenvectors)", "dispatch": "False", "default": "True"} +::std::tuple linalg_eigh_out(const Tensor & self, c10::string_view UPLO, Tensor & eigvals, Tensor & eigvecs); // {"schema": "aten::linalg_eigh.eigvals(Tensor self, str UPLO=\"L\", *, Tensor(a!) eigvals, Tensor(b!) eigvecs) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)", "dispatch": "False", "default": "True"} +Tensor linalg_eigvalsh(const Tensor & self, c10::string_view UPLO); // {"schema": "aten::linalg_eigvalsh(Tensor self, str UPLO=\"L\") -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_eigvalsh_out(const Tensor & self, c10::string_view UPLO, Tensor & out); // {"schema": "aten::linalg_eigvalsh.out(Tensor self, str UPLO=\"L\", *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_householder_product(const Tensor & input, const Tensor & tau); // {"schema": "aten::linalg_householder_product(Tensor input, Tensor tau) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & linalg_householder_product_out(const Tensor & input, const Tensor & tau, Tensor & out); // {"schema": "aten::linalg_householder_product.out(Tensor input, Tensor tau, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple linalg_inv_ex(const Tensor & A, bool check_errors); // {"schema": "aten::linalg_inv_ex(Tensor A, *, bool check_errors=False) -> (Tensor inverse, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_inv_ex_out(const Tensor & A, bool check_errors, Tensor & inverse, Tensor & info); // {"schema": "aten::linalg_inv_ex.inverse(Tensor A, *, bool check_errors=False, Tensor(a!) inverse, Tensor(b!) info) -> (Tensor(a!) inverse, Tensor(b!) info)", "dispatch": "True", "default": "False"} +Tensor linalg_inv(const Tensor & A); // {"schema": "aten::linalg_inv(Tensor A) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_inv_out(const Tensor & A, Tensor & out); // {"schema": "aten::linalg_inv.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor inverse(const Tensor & self); // {"schema": "aten::inverse(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & inverse_out(const Tensor & self, Tensor & out); // {"schema": "aten::inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor inner(const Tensor & self, const Tensor & other); // {"schema": "aten::inner(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & inner_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::inner.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor outer(const Tensor & self, const Tensor & vec2); // {"schema": "aten::outer(Tensor self, Tensor vec2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & outer_out(const Tensor & self, const Tensor & vec2, Tensor & out); // {"schema": "aten::outer.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor ger(const Tensor & self, const Tensor & vec2); // {"schema": "aten::ger(Tensor self, Tensor vec2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & ger_out(const Tensor & self, const Tensor & vec2, Tensor & out); // {"schema": "aten::ger.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_norm(const Tensor & self, const ::std::optional & ord, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor linalg_norm(const Tensor & self, c10::string_view ord, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_norm_out(const Tensor & self, const ::std::optional & ord, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & linalg_norm_out(const Tensor & self, c10::string_view ord, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_vector_norm(const Tensor & self, const Scalar & ord, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::linalg_vector_norm(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_vector_norm_out(const Tensor & self, const Scalar & ord, OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::linalg_vector_norm.out(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor linalg_matrix_norm(const Tensor & self, const Scalar & ord, IntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_norm_out(const Tensor & self, const Scalar & ord, IntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::linalg_matrix_norm.out(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_norm(const Tensor & self, c10::string_view ord, IntArrayRef dim, bool keepdim, ::std::optional dtype); // {"schema": "aten::linalg_matrix_norm.str_ord(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_norm_out(const Tensor & self, c10::string_view ord, IntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::linalg_matrix_norm.str_ord_out(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple _linalg_svd(const Tensor & A, bool full_matrices, bool compute_uv, ::std::optional driver); // {"schema": "aten::_linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True, *, str? driver=None) -> (Tensor U, Tensor S, Tensor Vh)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_svd_out(const Tensor & A, bool full_matrices, bool compute_uv, ::std::optional driver, Tensor & U, Tensor & S, Tensor & Vh); // {"schema": "aten::_linalg_svd.U(Tensor A, bool full_matrices=False, bool compute_uv=True, *, str? driver=None, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)", "dispatch": "True", "default": "False"} +::std::tuple linalg_svd(const Tensor & A, bool full_matrices, ::std::optional driver); // {"schema": "aten::linalg_svd(Tensor A, bool full_matrices=True, *, str? driver=None) -> (Tensor U, Tensor S, Tensor Vh)", "dispatch": "False", "default": "True"} +::std::tuple linalg_svd_out(const Tensor & A, bool full_matrices, ::std::optional driver, Tensor & U, Tensor & S, Tensor & Vh); // {"schema": "aten::linalg_svd.U(Tensor A, bool full_matrices=True, *, str? driver=None, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)", "dispatch": "False", "default": "True"} +Tensor linalg_svdvals(const Tensor & A, ::std::optional driver); // {"schema": "aten::linalg_svdvals(Tensor A, *, str? driver=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_svdvals_out(const Tensor & A, ::std::optional driver, Tensor & out); // {"schema": "aten::linalg_svdvals.out(Tensor A, *, str? driver=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_cond(const Tensor & self, const ::std::optional & p); // {"schema": "aten::linalg_cond(Tensor self, Scalar? p=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_cond_out(const Tensor & self, const ::std::optional & p, Tensor & out); // {"schema": "aten::linalg_cond.out(Tensor self, Scalar? p=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_cond(const Tensor & self, c10::string_view p); // {"schema": "aten::linalg_cond.p_str(Tensor self, str p) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_cond_out(const Tensor & self, c10::string_view p, Tensor & out); // {"schema": "aten::linalg_cond.p_str_out(Tensor self, str p, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_pinv(const Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); // {"schema": "aten::linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor linalg_pinv(const Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); // {"schema": "aten::linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_pinv(const Tensor & self, double rcond, bool hermitian); // {"schema": "aten::linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor linalg_pinv(const Tensor & self, const Tensor & rcond, bool hermitian); // {"schema": "aten::linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, double rcond, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, const Tensor & rcond, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple _linalg_solve_ex(const Tensor & A, const Tensor & B, bool left, bool check_errors); // {"schema": "aten::_linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor LU, Tensor pivots, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_solve_ex_out(const Tensor & A, const Tensor & B, bool left, bool check_errors, Tensor & result, Tensor & LU, Tensor & pivots, Tensor & info); // {"schema": "aten::_linalg_solve_ex.result(Tensor A, Tensor B, *, bool left=True, bool check_errors=False, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots, Tensor(d!) info) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots, Tensor(d!) info)", "dispatch": "True", "default": "False"} +::std::tuple linalg_solve_ex(const Tensor & A, const Tensor & B, bool left, bool check_errors); // {"schema": "aten::linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info)", "dispatch": "False", "default": "True"} +::std::tuple linalg_solve_ex_out(const Tensor & A, const Tensor & B, bool left, bool check_errors, Tensor & result, Tensor & info); // {"schema": "aten::linalg_solve_ex.out(Tensor A, Tensor B, *, bool left=True, bool check_errors=False, Tensor(a!) result, Tensor(b!) info) -> (Tensor(a!) result, Tensor(b!) info)", "dispatch": "False", "default": "True"} +Tensor linalg_solve(const Tensor & A, const Tensor & B, bool left); // {"schema": "aten::linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _spsolve(const Tensor & A, const Tensor & B, bool left); // {"schema": "aten::_spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & linalg_solve_out(const Tensor & A, const Tensor & B, bool left, Tensor & out); // {"schema": "aten::linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_tensorinv(const Tensor & self, int64_t ind); // {"schema": "aten::linalg_tensorinv(Tensor self, int ind=2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_tensorinv_out(const Tensor & self, int64_t ind, Tensor & out); // {"schema": "aten::linalg_tensorinv.out(Tensor self, int ind=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_tensorsolve(const Tensor & self, const Tensor & other, OptionalIntArrayRef dims); // {"schema": "aten::linalg_tensorsolve(Tensor self, Tensor other, int[]? dims=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_tensorsolve_out(const Tensor & self, const Tensor & other, OptionalIntArrayRef dims, Tensor & out); // {"schema": "aten::linalg_tensorsolve.out(Tensor self, Tensor other, int[]? dims=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple linalg_qr(const Tensor & A, c10::string_view mode); // {"schema": "aten::linalg_qr(Tensor A, str mode='reduced') -> (Tensor Q, Tensor R)", "dispatch": "True", "default": "True"} +::std::tuple linalg_qr_out(const Tensor & A, c10::string_view mode, Tensor & Q, Tensor & R); // {"schema": "aten::linalg_qr.out(Tensor A, str mode='reduced', *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)", "dispatch": "True", "default": "False"} +Tensor linalg_matrix_power(const Tensor & self, int64_t n); // {"schema": "aten::linalg_matrix_power(Tensor self, int n) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_power_out(const Tensor & self, int64_t n, Tensor & out); // {"schema": "aten::linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); // {"schema": "aten::linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); // {"schema": "aten::linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & self, double tol, bool hermitian); // {"schema": "aten::linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & self, double tol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & input, const Tensor & tol, bool hermitian); // {"schema": "aten::linalg_matrix_rank.tol_tensor(Tensor input, Tensor tol, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & input, const Tensor & tol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.out_tol_tensor(Tensor input, Tensor tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_multi_dot(TensorList tensors); // {"schema": "aten::linalg_multi_dot(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_multi_dot_out(TensorList tensors, Tensor & out); // {"schema": "aten::linalg_multi_dot.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nested_to_padded_tensor(const Tensor & self, double padding, OptionalIntArrayRef output_size); // {"schema": "aten::nested_to_padded_tensor(Tensor self, float padding, int[]? output_size=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_serialization_subcmul(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_parallel_materialize(const Tensor & self, int64_t num_parallel, bool skip_first); // {"schema": "aten::_test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_optional_intlist(const Tensor & values, OptionalIntArrayRef addends); // {"schema": "aten::_test_optional_intlist(Tensor values, int[]? addends) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _test_optional_filled_intlist(const Tensor & values, OptionalIntArrayRef addends); // {"schema": "aten::_test_optional_filled_intlist(Tensor values, int[2]? addends) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _test_optional_floatlist(const Tensor & values, ::std::optional> addends); // {"schema": "aten::_test_optional_floatlist(Tensor values, float[]? addends) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _test_string_default(const Tensor & dummy, c10::string_view a, c10::string_view b); // {"schema": "aten::_test_string_default(Tensor dummy, str a=\"\\\"'\\\\\", str b='\"\\'\\\\') -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_ambiguous_defaults(const Tensor & dummy, int64_t a, int64_t b); // {"schema": "aten::_test_ambiguous_defaults.a(Tensor dummy, int a=1, int b=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_ambiguous_defaults(const Tensor & dummy, int64_t a, c10::string_view b); // {"schema": "aten::_test_ambiguous_defaults.b(Tensor dummy, int a=2, str b=\"2\") -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_warn_in_autograd(const Tensor & self); // {"schema": "aten::_test_warn_in_autograd(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch(const Tensor & self); // {"schema": "aten::_test_autograd_multiple_dispatch.fullcoverage(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch(const Tensor & self, bool b); // {"schema": "aten::_test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch_view(const Tensor & self); // {"schema": "aten::_test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch_view_copy(const Tensor & self); // {"schema": "aten::_test_autograd_multiple_dispatch_view_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor segment_reduce(const Tensor & data, c10::string_view reduce, const ::std::optional & lengths, const ::std::optional & indices, const ::std::optional & offsets, int64_t axis, bool unsafe, const ::std::optional & initial); // {"schema": "aten::segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, Tensor? offsets=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _segment_reduce_backward(const Tensor & grad, const Tensor & output, const Tensor & data, c10::string_view reduce, const ::std::optional & lengths, const ::std::optional & offsets, int64_t axis, const ::std::optional & initial); // {"schema": "aten::_segment_reduce_backward(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor pad_sequence(TensorList sequences, bool batch_first, double padding_value, c10::string_view padding_side); // {"schema": "aten::pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0, str padding_side=\"right\") -> Tensor", "dispatch": "False", "default": "True"} +Tensor flatten_dense_tensors(TensorList tensors); // {"schema": "aten::flatten_dense_tensors(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector unflatten_dense_tensors(const Tensor & flat, TensorList tensors); // {"schema": "aten::unflatten_dense_tensors(Tensor flat, Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor _nested_tensor_from_tensor_list(TensorList list, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); // {"schema": "aten::_nested_tensor_from_tensor_list(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _fw_primal_copy(const Tensor & self, int64_t level); // {"schema": "aten::_fw_primal_copy(Tensor self, int level) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _make_dual_copy(const Tensor & primal, const Tensor & tangent, int64_t level); // {"schema": "aten::_make_dual_copy(Tensor primal, Tensor tangent, int level) -> Tensor", "dispatch": "True", "default": "True"} +Tensor view_as_real_copy(const Tensor & self); // {"schema": "aten::view_as_real_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor view_as_complex_copy(const Tensor & self); // {"schema": "aten::view_as_complex_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _conj_copy(const Tensor & self); // {"schema": "aten::_conj_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _neg_view_copy(const Tensor & self); // {"schema": "aten::_neg_view_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor as_strided_copy(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); // {"schema": "aten::as_strided_copy(Tensor self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _sparse_broadcast_to_copy(const Tensor & self, IntArrayRef size); // {"schema": "aten::_sparse_broadcast_to_copy(Tensor self, int[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor diagonal_copy(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal_copy(Tensor self, int offset=0, int dim1=0, int dim2=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor expand_copy(const Tensor & self, c10::SymIntArrayRef size, bool implicit); // {"schema": "aten::expand_copy(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor permute_copy(const Tensor & self, IntArrayRef dims); // {"schema": "aten::permute_copy(Tensor self, int[] dims) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _reshape_alias_copy(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::_reshape_alias_copy(Tensor self, SymInt[] size, SymInt[] stride) -> Tensor", "dispatch": "True", "default": "True"} +Tensor select_copy(const Tensor & self, int64_t dim, c10::SymInt index); // {"schema": "aten::select_copy.int(Tensor self, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "True"} +Tensor detach_copy(const Tensor & self); // {"schema": "aten::detach_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor slice_copy(const Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); // {"schema": "aten::slice_copy.Tensor(Tensor self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor", "dispatch": "True", "default": "True"} +::std::vector split_copy(const Tensor & self, c10::SymInt split_size, int64_t dim); // {"schema": "aten::split_copy.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector split_with_sizes_copy(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim); // {"schema": "aten::split_with_sizes_copy(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +Tensor squeeze_copy(const Tensor & self); // {"schema": "aten::squeeze_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor squeeze_copy(const Tensor & self, int64_t dim); // {"schema": "aten::squeeze_copy.dim(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor squeeze_copy(const Tensor & self, IntArrayRef dim); // {"schema": "aten::squeeze_copy.dims(Tensor self, int[] dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor t_copy(const Tensor & self); // {"schema": "aten::t_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor transpose_copy(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::transpose_copy.int(Tensor self, int dim0, int dim1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor unsqueeze_copy(const Tensor & self, int64_t dim); // {"schema": "aten::unsqueeze_copy(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _indices_copy(const Tensor & self); // {"schema": "aten::_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _values_copy(const Tensor & self); // {"schema": "aten::_values_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor indices_copy(const Tensor & self); // {"schema": "aten::indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor values_copy(const Tensor & self); // {"schema": "aten::values_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor crow_indices_copy(const Tensor & self); // {"schema": "aten::crow_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor col_indices_copy(const Tensor & self); // {"schema": "aten::col_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ccol_indices_copy(const Tensor & self); // {"schema": "aten::ccol_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor row_indices_copy(const Tensor & self); // {"schema": "aten::row_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +::std::vector unbind_copy(const Tensor & self, int64_t dim); // {"schema": "aten::unbind_copy.int(Tensor self, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +void unbind_copy_out(const Tensor & self, int64_t dim, TensorList out); // {"schema": "aten::unbind_copy.int_out(Tensor self, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void split_copy_out(const Tensor & self, c10::SymInt split_size, int64_t dim, TensorList out); // {"schema": "aten::split_copy.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void split_with_sizes_copy_out(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim, TensorList out); // {"schema": "aten::split_with_sizes_copy.out(Tensor self, SymInt[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor view_copy(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::view_copy(Tensor self, SymInt[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor view_copy(const Tensor & self, ScalarType dtype); // {"schema": "aten::view_copy.dtype(Tensor self, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor unfold_copy(const Tensor & self, int64_t dimension, int64_t size, int64_t step); // {"schema": "aten::unfold_copy(Tensor self, int dimension, int size, int step) -> Tensor", "dispatch": "True", "default": "True"} +Tensor alias_copy(const Tensor & self); // {"schema": "aten::alias_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor to_padded_tensor(const Tensor & self, double padding, OptionalSymIntArrayRef output_size); // {"schema": "aten::to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _jagged_to_padded_dense_forward(const Tensor & values, TensorList offsets, c10::SymIntArrayRef max_lengths, double padding_value); // {"schema": "aten::_jagged_to_padded_dense_forward(Tensor values, Tensor[] offsets, SymInt[] max_lengths, float padding_value=0.0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _padded_dense_to_jagged_forward(const Tensor & dense, TensorList offsets, ::std::optional total_L); // {"schema": "aten::_padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_softmax_with_shape(const Tensor & self, const Tensor & query); // {"schema": "aten::_nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _safe_softmax(const Tensor & self, int64_t dim, ::std::optional dtype); // {"schema": "aten::_safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _transformer_encoder_layer_fwd(const Tensor & src, int64_t embed_dim, int64_t num_heads, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const Tensor & norm_weight_1, const Tensor & norm_bias_1, const Tensor & norm_weight_2, const Tensor & norm_bias_2, const Tensor & ffn_weight_1, const Tensor & ffn_bias_1, const Tensor & ffn_weight_2, const Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type); // {"schema": "aten::_transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _native_multi_head_attention(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const ::std::optional & mask, bool need_weights, bool average_attn_weights, ::std::optional mask_type); // {"schema": "aten::_native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor scaled_dot_product_attention(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_mask, double dropout_p, bool is_causal, ::std::optional scale, bool enable_gqa); // {"schema": "aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> Tensor", "dispatch": "False", "default": "True"} +int64_t _fused_sdp_choice(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_mask, double dropout_p, bool is_causal, ::std::optional scale, bool enable_gqa); // {"schema": "aten::_fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> int", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_attention_math(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_mask, double dropout_p, bool is_causal, const ::std::optional & dropout_mask, ::std::optional scale, bool enable_gqa); // {"schema": "aten::_scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _scaled_dot_product_attention_math_for_mps(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_mask, double dropout_p, bool is_causal, const ::std::optional & dropout_mask, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_flash_attention(const Tensor & query, const Tensor & key, const Tensor & value, double dropout_p, bool is_causal, bool return_debug_mask, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_flash_attention_for_cpu(const Tensor & query, const Tensor & key, const Tensor & value, double dropout_p, bool is_causal, const ::std::optional & attn_mask, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_fused_attention_overrideable(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_bias, double dropout_p, bool is_causal, bool return_debug_mask, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)", "dispatch": "True", "default": "True"} +::std::tuple _scaled_dot_product_flash_attention_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, const Tensor & cum_seq_q, const Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const Tensor & philox_seed, const Tensor & philox_offset, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_flash_attention_for_cpu_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, double dropout_p, bool is_causal, const ::std::optional & attn_mask, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_fused_attention_overrideable_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & attn_bias, ::std::array grad_input_mask, const Tensor & out, const Tensor & logsumexp, const Tensor & cum_seq_q, const Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const Tensor & philox_seed, const Tensor & philox_offset, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)", "dispatch": "True", "default": "True"} +::std::tuple _scaled_dot_product_efficient_attention(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_bias, bool compute_log_sumexp, double dropout_p, bool is_causal, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_efficient_attention_backward(const Tensor & grad_out_, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & attn_bias, const Tensor & out, const Tensor & logsumexp, const Tensor & philox_seed, const Tensor & philox_offset, double dropout_p, ::std::array grad_input_mask, bool is_causal, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor attn_bias, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, float dropout_p, bool[4] grad_input_mask, bool is_causal=False, *, float? scale=None) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_cudnn_attention(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & attn_bias, bool compute_log_sumexp, double dropout_p, bool is_causal, bool return_debug_mask, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_cudnn_attention_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, const Tensor & philox_seed, const Tensor & philox_offset, const Tensor & attn_bias, const Tensor & cum_seq_q, const Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, ::std::optional scale); // {"schema": "aten::_scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _flash_attention_forward(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & cum_seq_q, const ::std::optional & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, bool return_debug_mask, ::std::optional scale, ::std::optional window_size_left, ::std::optional window_size_right, const ::std::optional & seqused_k, const ::std::optional & alibi_slopes); // {"schema": "aten::_flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)", "dispatch": "True", "default": "False"} +::std::tuple _flash_attention_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, const Tensor & cum_seq_q, const Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const Tensor & philox_seed, const Tensor & philox_offset, ::std::optional scale, ::std::optional window_size_left, ::std::optional window_size_right); // {"schema": "aten::_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _efficient_attention_forward(const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & bias, const ::std::optional & cu_seqlens_q, const ::std::optional & cu_seqlens_k, ::std::optional max_seqlen_q, ::std::optional max_seqlen_k, double dropout_p, int64_t custom_mask_type, bool compute_log_sumexp, ::std::optional scale, const ::std::optional & seqlen_k, ::std::optional window_size); // {"schema": "aten::_efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt? max_seqlen_q, SymInt? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? seqlen_k=None, int? window_size=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)", "dispatch": "True", "default": "False"} +::std::tuple _efficient_attention_backward(const Tensor & grad_out_, const Tensor & query, const Tensor & key, const Tensor & value, const ::std::optional & bias, const Tensor & out, const ::std::optional & cu_seqlens_q, const ::std::optional & cu_seqlens_k, c10::SymInt max_seqlen_q, c10::SymInt max_seqlen_k, const Tensor & logsumexp, double dropout_p, const Tensor & philox_seed, const Tensor & philox_offset, int64_t custom_mask_type, bool bias_requires_grad, ::std::optional scale, ::std::optional num_splits_key, ::std::optional window_size, bool shared_storage_dqdkdv); // {"schema": "aten::_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None, int? window_size=None, bool shared_storage_dqdkdv=False) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _triton_scaled_dot_attention(const Tensor & q, const Tensor & k, const Tensor & v, double dropout_p); // {"schema": "aten::_triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fill_mem_eff_dropout_mask_(Tensor & self, double dropout_p, int64_t seed, int64_t offset); // {"schema": "aten::_fill_mem_eff_dropout_mask_(Tensor(a!) self, float dropout_p, int seed, int offset) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _triton_multi_head_attention(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const ::std::optional & mask); // {"schema": "aten::_triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor special_airy_ai(const Tensor & x); // {"schema": "aten::special_airy_ai(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_airy_ai_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_airy_ai.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_j0(const Tensor & self); // {"schema": "aten::special_bessel_j0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_j0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_j0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_j1(const Tensor & self); // {"schema": "aten::special_bessel_j1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_j1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_j1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_y0(const Tensor & self); // {"schema": "aten::special_bessel_y0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_y0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_y0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_y1(const Tensor & self); // {"schema": "aten::special_bessel_y1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_y1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_y1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_chebyshev_polynomial_t(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_t(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_t(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_t(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_t_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_t_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_t_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_t.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_u(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_u(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_u(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_u(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_u_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_u_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_u_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_u.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_v(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_v(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_v(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_v_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_v_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_v_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_w(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_w(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_w(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_w_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_w_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_w_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_w.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_h(const Tensor & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_h(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_h(const Scalar & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_h(const Tensor & x, const Scalar & n); // {"schema": "aten::special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_h_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_h.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_hermite_polynomial_h_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_h_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_h.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_he(const Tensor & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_he(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_he(const Scalar & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_he(const Tensor & x, const Scalar & n); // {"schema": "aten::special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_he_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_he.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_hermite_polynomial_he_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_he_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_he.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_laguerre_polynomial_l(const Tensor & x, const Tensor & n); // {"schema": "aten::special_laguerre_polynomial_l(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_laguerre_polynomial_l(const Scalar & x, const Tensor & n); // {"schema": "aten::special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_laguerre_polynomial_l(const Tensor & x, const Scalar & n); // {"schema": "aten::special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_laguerre_polynomial_l_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_laguerre_polynomial_l.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_laguerre_polynomial_l_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_laguerre_polynomial_l_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_laguerre_polynomial_l.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_legendre_polynomial_p(const Tensor & x, const Tensor & n); // {"schema": "aten::special_legendre_polynomial_p(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_legendre_polynomial_p(const Scalar & x, const Tensor & n); // {"schema": "aten::special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_legendre_polynomial_p(const Tensor & x, const Scalar & n); // {"schema": "aten::special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_legendre_polynomial_p_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_legendre_polynomial_p.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_legendre_polynomial_p_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_legendre_polynomial_p_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_legendre_polynomial_p.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_modified_bessel_i0(const Tensor & self); // {"schema": "aten::special_modified_bessel_i0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_i0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_modified_bessel_i1(const Tensor & self); // {"schema": "aten::special_modified_bessel_i1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_i1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_modified_bessel_k0(const Tensor & self); // {"schema": "aten::special_modified_bessel_k0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_k0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_modified_bessel_k1(const Tensor & self); // {"schema": "aten::special_modified_bessel_k1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_k1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_scaled_modified_bessel_k0(const Tensor & x); // {"schema": "aten::special_scaled_modified_bessel_k0(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_scaled_modified_bessel_k0_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_scaled_modified_bessel_k0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_scaled_modified_bessel_k1(const Tensor & x); // {"schema": "aten::special_scaled_modified_bessel_k1(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_scaled_modified_bessel_k1_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_scaled_modified_bessel_k1.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_shifted_chebyshev_polynomial_t(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_t(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_t(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_t(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_t_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_t_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_t_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_u(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_u(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_u(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_u(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_u_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_u_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_u_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_v(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_v(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_v(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_v_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_v_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_v_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_w(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_w(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_w(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_w_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_w_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_w_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_spherical_bessel_j0(const Tensor & x); // {"schema": "aten::special_spherical_bessel_j0(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_spherical_bessel_j0_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_spherical_bessel_j0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _foobar(const Tensor & self, bool arg1, bool arg2, bool arg3); // {"schema": "aten::_foobar(Tensor self, bool arg1=True, bool arg2=True, *, bool arg3=True) -> Tensor", "dispatch": "True", "default": "False"} +void _fused_adam_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adam_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adamw_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adamw_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_sgd_(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_sgd_(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, const Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adagrad_(TensorList self, TensorList grads, TensorList state_sums, TensorList state_steps, double lr, double lr_decay, double weight_decay, double eps, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _propagate_xla_data(const Tensor & input, const Tensor & output); // {"schema": "aten::_propagate_xla_data(Tensor input, Tensor output) -> ()", "dispatch": "False", "default": "True"} +Tensor & _new_zeros_with_same_feature_meta_out(const Tensor & self, const Tensor & other, int64_t self_num_batch_dims, Tensor & out); // {"schema": "aten::_new_zeros_with_same_feature_meta.out(Tensor self, Tensor other, *, int self_num_batch_dims=0, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _cudnn_ctc_loss_out(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool deterministic, bool zero_infinity, Tensor & out0, Tensor & out1); // {"schema": "aten::_cudnn_ctc_loss.out(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _cudnn_rnn_flatten_weight_out(TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional, Tensor & out); // {"schema": "aten::_cudnn_rnn_flatten_weight.out(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _cudnn_rnn_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const ::std::optional & weight_buf, const Tensor & hx, const ::std::optional & cx, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const ::std::optional & dropout_state, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4); // {"schema": "aten::_cudnn_rnn.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor? weight_buf, Tensor hx, Tensor? cx, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))", "dispatch": "True", "default": "True"} +void _cudnn_rnn_backward_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const ::std::optional & cx, const Tensor & output, const ::std::optional & grad_output, const ::std::optional & grad_hy, const ::std::optional & grad_cy, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const ::std::optional & dropout_state, const Tensor & reserve, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2, TensorList out3); // {"schema": "aten::_cudnn_rnn_backward.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!)[] out3) -> ()", "dispatch": "True", "default": "True"} +Tensor & _cudnn_init_dropout_state_out(double dropout, bool train, int64_t dropout_seed, Tensor & out); // {"schema": "aten::_cudnn_init_dropout_state.out(float dropout, bool train, int dropout_seed, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _fused_dropout_out(const Tensor & self, double p, ::std::optional generator, Tensor & out0, Tensor & out1); // {"schema": "aten::_fused_dropout.out(Tensor self, float p, Generator? generator=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _masked_scale_out(const Tensor & self, const Tensor & mask, double scale, Tensor & out); // {"schema": "aten::_masked_scale.out(Tensor self, Tensor mask, float scale, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple native_dropout_out(const Tensor & input, double p, ::std::optional train, Tensor & out0, Tensor & out1); // {"schema": "aten::native_dropout.out(Tensor input, float p, bool? train, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & native_dropout_backward_out(const Tensor & grad_output, const Tensor & mask, double scale, Tensor & out); // {"schema": "aten::native_dropout_backward.out(Tensor grad_output, Tensor mask, float scale, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _conj_physical_out(const Tensor & self, Tensor & out); // {"schema": "aten::_conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _add_relu_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::_add_relu.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & add_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::add.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & affine_grid_generator_out(const Tensor & theta, c10::SymIntArrayRef size, bool align_corners, Tensor & out); // {"schema": "aten::affine_grid_generator.out(Tensor theta, SymInt[] size, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_functorch_fallback_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::_test_functorch_fallback.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bartlett_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::bartlett_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bartlett_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::bartlett_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_batch_norm_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & mean, const Tensor & var, double eps, double output_scale, int64_t output_zero_point, Tensor & out); // {"schema": "aten::quantized_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bernoulli_out(const Tensor & self, const Tensor & p, ::std::optional generator, Tensor & out); // {"schema": "aten::bernoulli.Tensor_out(Tensor self, Tensor p, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bernoulli(const Tensor & self, const Tensor & p, ::std::optional generator); // {"schema": "aten::bernoulli.Tensor(Tensor self, Tensor p, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bernoulli_out(const Tensor & self, double p, ::std::optional generator, Tensor & out); // {"schema": "aten::bernoulli.float_out(Tensor self, float p=0.5, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & binary_cross_entropy_with_logits_out(const Tensor & self, const Tensor & target, const ::std::optional & weight, const ::std::optional & pos_weight, int64_t reduction, Tensor & out); // {"schema": "aten::binary_cross_entropy_with_logits.out(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bincount_out(const Tensor & self, const ::std::optional & weights, int64_t minlength, Tensor & out); // {"schema": "aten::bincount.out(Tensor self, Tensor? weights=None, int minlength=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & blackman_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::blackman_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & blackman_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::blackman_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & block_diag_out(TensorList tensors, Tensor & out); // {"schema": "aten::block_diag.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & constant_pad_nd_out(const Tensor & self, c10::SymIntArrayRef pad, const Scalar & value, Tensor & out); // {"schema": "aten::constant_pad_nd.out(Tensor self, SymInt[] pad, Scalar value=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & convolution_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, Tensor & out); // {"schema": "aten::convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward_out(const Tensor & grad_output, const Tensor & input, const Tensor & weight, OptionalSymIntArrayRef bias_sizes, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::convolution_backward.out(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & convolution_overrideable_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, Tensor & out); // {"schema": "aten::convolution_overrideable.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward_overrideable_out(const Tensor & grad_output, const Tensor & input, const Tensor & weight, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::convolution_backward_overrideable.out(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & _convolution_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, Tensor & out); // {"schema": "aten::_convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & conv_tbc_out(const Tensor & self, const Tensor & weight, const Tensor & bias, int64_t pad, Tensor & out); // {"schema": "aten::conv_tbc.out(Tensor self, Tensor weight, Tensor bias, int pad=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & copy_out(const Tensor & self, const Tensor & src, bool non_blocking, Tensor & out); // {"schema": "aten::copy.out(Tensor self, Tensor src, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _copy_from_out(const Tensor & self, const Tensor & dst, bool non_blocking, Tensor & out); // {"schema": "aten::_copy_from.out(Tensor self, Tensor dst, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _copy_from_and_resize_out(const Tensor & self, const Tensor & dst, Tensor & out); // {"schema": "aten::_copy_from_and_resize.out(Tensor self, Tensor dst, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & count_nonzero_out(const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::count_nonzero.dim_IntList_out(Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & count_nonzero_out(const Tensor & self, ::std::optional dim, Tensor & out); // {"schema": "aten::count_nonzero.out(Tensor self, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_affine_grid_generator_out(const Tensor & theta, int64_t N, int64_t C, int64_t H, int64_t W, Tensor & out); // {"schema": "aten::cudnn_affine_grid_generator.out(Tensor theta, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_affine_grid_generator_backward_out(const Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W, Tensor & out); // {"schema": "aten::cudnn_affine_grid_generator_backward.out(Tensor grad, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple cudnn_batch_norm_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::cudnn_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +::std::tuple cudnn_batch_norm_backward_out(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, const Tensor & reserveSpace, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::cudnn_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & cudnn_convolution_transpose_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, Tensor & out); // {"schema": "aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mps_convolution_transpose_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::_mps_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mps_convolution_transpose_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1); // {"schema": "aten::mps_convolution_transpose_backward.out(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & cudnn_convolution_relu_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::cudnn_convolution_relu.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_convolution_add_relu_out(const Tensor & self, const Tensor & weight, const Tensor & z, const ::std::optional & alpha, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::cudnn_convolution_add_relu.out(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_grid_sampler_out(const Tensor & self, const Tensor & grid, Tensor & out); // {"schema": "aten::cudnn_grid_sampler.out(Tensor self, Tensor grid, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple cudnn_grid_sampler_backward_out(const Tensor & self, const Tensor & grid, const Tensor & grad_output, Tensor & out0, Tensor & out1); // {"schema": "aten::cudnn_grid_sampler_backward.out(Tensor self, Tensor grid, Tensor grad_output, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _ctc_loss_out(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool zero_infinity, Tensor & out0, Tensor & out1); // {"schema": "aten::_ctc_loss.out(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _ctc_loss_out(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, bool zero_infinity, Tensor & out0, Tensor & out1); // {"schema": "aten::_ctc_loss.Tensor_out(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, bool zero_infinity=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _ctc_loss_backward_out(const Tensor & grad, const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity, Tensor & out); // {"schema": "aten::_ctc_loss_backward.out(Tensor grad, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diag_embed_out(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diag_embed.out(Tensor self, int offset=0, int dim1=-2, int dim2=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diagonal_backward_out(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diagonal_backward.out(Tensor grad_output, SymInt[] input_sizes, int offset, int dim1, int dim2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::div.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Scalar & other, ::std::optional rounding_mode, Tensor & out); // {"schema": "aten::div.Scalar_mode_out(Tensor self, Scalar other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & embedding_out(const Tensor & weight, const Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse, Tensor & out); // {"schema": "aten::embedding.out(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & embedding_dense_backward_out(const Tensor & grad_output, const Tensor & indices, c10::SymInt num_weights, c10::SymInt padding_idx, bool scale_grad_by_freq, Tensor & out); // {"schema": "aten::embedding_dense_backward.out(Tensor grad_output, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & embedding_renorm_out(const Tensor & self, const Tensor & indices, double max_norm, double norm_type, Tensor & out); // {"schema": "aten::embedding_renorm.out(Tensor self, Tensor indices, float max_norm, float norm_type, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor embedding_renorm(const Tensor & self, const Tensor & indices, double max_norm, double norm_type); // {"schema": "aten::embedding_renorm(Tensor self, Tensor indices, float max_norm, float norm_type) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple _embedding_bag_forward_only_out(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::_embedding_bag_forward_only.out(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +::std::tuple _embedding_bag_out(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::_embedding_bag.out(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +Tensor & _embedding_bag_dense_backward_out(const Tensor & grad, const Tensor & indices, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, Tensor & out); // {"schema": "aten::_embedding_bag_dense_backward.out(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _embedding_bag_per_sample_weights_backward_out(const Tensor & grad, const Tensor & weight, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, int64_t mode, int64_t padding_idx, Tensor & out); // {"schema": "aten::_embedding_bag_per_sample_weights_backward.out(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_out(IntArrayRef size, ::std::optional names, ::std::optional memory_format, Tensor & out); // {"schema": "aten::empty.names_out(int[] size, *, Dimname[]? names, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_permuted_out(c10::SymIntArrayRef size, IntArrayRef physical_layout, Tensor & out); // {"schema": "aten::empty_permuted.out(SymInt[] size, int[] physical_layout, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_empty_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::new_empty.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_empty_strided_out(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::new_empty_strided.out(Tensor self, SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_full_out(const Tensor & self, c10::SymIntArrayRef size, const Scalar & fill_value, Tensor & out); // {"schema": "aten::new_full.out(Tensor self, SymInt[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_zeros_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::new_zeros.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_ones_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::new_ones.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _empty_affine_quantized_out(c10::SymIntArrayRef size, double scale, int64_t zero_point, ::std::optional memory_format, Tensor & out); // {"schema": "aten::_empty_affine_quantized.out(SymInt[] size, *, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _empty_per_channel_affine_quantized_out(c10::SymIntArrayRef size, const Tensor & scales, const Tensor & zero_points, int64_t axis, ::std::optional memory_format, Tensor & out); // {"schema": "aten::_empty_per_channel_affine_quantized.out(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & resize_out(const Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format, const Tensor & out); // {"schema": "aten::resize.out(Tensor self, SymInt[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resize(const Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format); // {"schema": "aten::resize(Tensor self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +const Tensor & _resize_output_out(const Tensor & self, c10::SymIntArrayRef size, Device device, const Tensor & out); // {"schema": "aten::_resize_output.out(Tensor self, SymInt[] size, Device device, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _resize_output(const Tensor & self, c10::SymIntArrayRef size, Device device); // {"schema": "aten::_resize_output(Tensor self, SymInt[] size, Device device) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & empty_quantized_out(IntArrayRef size, const Tensor & qtensor, ::std::optional memory_format, Tensor & out); // {"schema": "aten::empty_quantized.out(int[] size, Tensor qtensor, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_like_out(const Tensor & self, ::std::optional memory_format, Tensor & out); // {"schema": "aten::empty_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_strided_out(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & fill_out(const Tensor & self, const Scalar & value, Tensor & out); // {"schema": "aten::fill.Scalar_out(Tensor self, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & fill_out(const Tensor & self, const Tensor & value, Tensor & out); // {"schema": "aten::fill.Tensor_out(Tensor self, Tensor value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & floor_divide_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::floor_divide.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & full_out(IntArrayRef size, const Scalar & fill_value, ::std::optional names, Tensor & out); // {"schema": "aten::full.names_out(int[] size, Scalar fill_value, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & full_like_out(const Tensor & self, const Scalar & fill_value, ::std::optional memory_format, Tensor & out); // {"schema": "aten::full_like.out(Tensor self, Scalar fill_value, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & from_file_out(c10::string_view filename, ::std::optional shared, ::std::optional size, Tensor & out); // {"schema": "aten::from_file.out(str filename, bool? shared=None, int? size=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & grid_sampler_2d_out(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, Tensor & out); // {"schema": "aten::grid_sampler_2d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple grid_sampler_2d_backward_out(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask, Tensor & out0, Tensor & out1); // {"schema": "aten::grid_sampler_2d_backward.out(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _grid_sampler_2d_cpu_fallback_out(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, Tensor & out); // {"schema": "aten::_grid_sampler_2d_cpu_fallback.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & grid_sampler_3d_out(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, Tensor & out); // {"schema": "aten::grid_sampler_3d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple grid_sampler_3d_backward_out(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask, Tensor & out0, Tensor & out1); // {"schema": "aten::grid_sampler_3d_backward.out(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & hann_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::hann_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hann_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::hann_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::hamming_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::hamming_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, bool periodic, double alpha, Tensor & out); // {"schema": "aten::hamming_window.periodic_alpha_out(int window_length, bool periodic, float alpha, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, bool periodic, double alpha, double beta, Tensor & out); // {"schema": "aten::hamming_window.periodic_alpha_beta_out(int window_length, bool periodic, float alpha, float beta, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & kaiser_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::kaiser_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & kaiser_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::kaiser_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & kaiser_window_out(int64_t window_length, bool periodic, double beta, Tensor & out); // {"schema": "aten::kaiser_window.beta_out(int window_length, bool periodic, float beta, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple native_group_norm_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple native_group_norm_backward_out(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & rstd, const ::std::optional & weight, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_group_norm_backward.out(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, SymInt N, SymInt C, SymInt HxW, int group, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & index_put_out(const Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate, Tensor & out); // {"schema": "aten::index_put.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _index_put_impl_out(const Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate, bool unsafe, Tensor & out); // {"schema": "aten::_index_put_impl.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _index_put_impl(const Tensor & self, const c10::List<::std::optional> & indices, const Tensor & values, bool accumulate, bool unsafe); // {"schema": "aten::_index_put_impl(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isnan_out(const Tensor & self, Tensor & out); // {"schema": "aten::isnan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple native_layer_norm_out(const Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_layer_norm.out(Tensor input, SymInt[] normalized_shape, Tensor? weight, Tensor? bias, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple native_layer_norm_backward_out(const Tensor & grad_out, const Tensor & input, c10::SymIntArrayRef normalized_shape, const Tensor & mean, const Tensor & rstd, const ::std::optional & weight, const ::std::optional & bias, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_layer_norm_backward.out(Tensor grad_out, Tensor input, SymInt[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple linear_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::linear_backward.out(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & mkldnn_linear_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, Tensor & out); // {"schema": "aten::mkldnn_linear.out(Tensor self, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_linear_backward_input_out(IntArrayRef input_size, const Tensor & grad_output, const Tensor & weight, Tensor & out); // {"schema": "aten::mkldnn_linear_backward_input.out(int[] input_size, Tensor grad_output, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_linear_backward_weights_out(const Tensor & grad_output, const Tensor & input, const Tensor & weight, bool bias_defined, Tensor & out0, Tensor & out1); // {"schema": "aten::mkldnn_linear_backward_weights.out(Tensor grad_output, Tensor input, Tensor weight, bool bias_defined, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_linear_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::mkldnn_linear_backward.out(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple matmul_backward_out(const Tensor & grad, const Tensor & self, const Tensor & other, ::std::array mask, Tensor & out0, Tensor & out1); // {"schema": "aten::matmul_backward.out(Tensor grad, Tensor self, Tensor other, bool[2] mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _aminmax_out(const Tensor & self, Tensor & out0, Tensor & out1); // {"schema": "aten::_aminmax.out(Tensor self, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _aminmax_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & out0, Tensor & out1); // {"schema": "aten::_aminmax.dim_out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & max_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::max_pool2d_backward.out(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool2d_backward_out(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool2d_backward.out(Tensor grad_output, Tensor output, Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool3d_backward_out(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool3d_backward.out(Tensor grad_output, Tensor output, Tensor input, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_max_pool1d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::quantized_max_pool1d.out(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_max_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::quantized_max_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_max_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::quantized_max_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & median_out(const Tensor & self, Tensor & out); // {"schema": "aten::median.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & nanmedian_out(const Tensor & self, Tensor & out); // {"schema": "aten::nanmedian.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mps_convolution_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::_mps_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mps_convolution_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::mps_convolution_backward.out(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & mkldnn_convolution_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::mkldnn_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_rnn_layer_out(const Tensor & input, const Tensor & weight0, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & hx_, const Tensor & cx_, bool reverse, IntArrayRef batch_sizes, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::mkldnn_rnn_layer.out(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_rnn_layer_backward_out(const Tensor & input, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & weight4, const Tensor & hx_, const Tensor & cx_tmp, const Tensor & output, const Tensor & hy_, const Tensor & cy_, const ::std::optional & grad_output, const ::std::optional & grad_hy, const ::std::optional & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, IntArrayRef batch_sizes, bool batch_first, const Tensor & workspace, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4, Tensor & out5, Tensor & out6); // {"schema": "aten::mkldnn_rnn_layer_backward.out(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4, Tensor(f!) out5, Tensor(g!) out6) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))", "dispatch": "True", "default": "True"} +::std::tuple miopen_batch_norm_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::miopen_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple miopen_batch_norm_backward_out(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::miopen_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & miopen_convolution_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, Tensor & out); // {"schema": "aten::miopen_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & miopen_convolution_transpose_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, Tensor & out); // {"schema": "aten::miopen_convolution_transpose.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & miopen_depthwise_convolution_out(const Tensor & self, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, Tensor & out); // {"schema": "aten::miopen_depthwise_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple miopen_rnn_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & hx, const ::std::optional & cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const ::std::optional & dropout_state, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4); // {"schema": "aten::miopen_rnn.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))", "dispatch": "True", "default": "True"} +void miopen_rnn_backward_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const ::std::optional & cx, const Tensor & output, const ::std::optional & grad_output, const ::std::optional & grad_hy, const ::std::optional & grad_cy, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const ::std::optional & dropout_state, const Tensor & reserve, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2, TensorList out3); // {"schema": "aten::miopen_rnn_backward.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!)[] out3) -> ()", "dispatch": "True", "default": "True"} +Tensor & _sparse_sparse_matmul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::_sparse_sparse_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mul_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::mul.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _native_batch_norm_legit_functional(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & running_mean, const Tensor & running_var, bool training, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit_functional(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor running_mean_out, Tensor running_var_out)", "dispatch": "True", "default": "True"} +::std::tuple _native_batch_norm_legit_no_training_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & running_mean, const Tensor & running_var, double momentum, double eps, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_native_batch_norm_legit_no_training.out(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_stats_out(const Tensor & input, double eps, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_stats.out(Tensor input, float eps, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_gather_stats_out(const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, double eps, int64_t count, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_gather_stats.out(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_gather_stats_with_counts_out(const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, double eps, const Tensor & counts, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_gather_stats_with_counts.out(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple native_batch_norm_backward_out(const Tensor & grad_out, const Tensor & input, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_invstd, bool train, double eps, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_batch_norm_backward.out(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_backward_reduce_out(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & weight, bool input_g, bool weight_g, bool bias_g, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::batch_norm_backward_reduce.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +Tensor & batch_norm_backward_elemt_out(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const ::std::optional & weight, const Tensor & sum_dy, const Tensor & sum_dy_xmu, const Tensor & count, Tensor & out); // {"schema": "aten::batch_norm_backward_elemt.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_update_stats_out(const Tensor & input, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_update_stats.out(Tensor input, Tensor? running_mean, Tensor? running_var, float momentum, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _nnpack_spatial_convolution_out(const Tensor & input, const Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::_nnpack_spatial_convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ones_out(IntArrayRef size, ::std::optional names, Tensor & out); // {"schema": "aten::ones.names_out(int[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ones_like_out(const Tensor & self, ::std::optional memory_format, Tensor & out); // {"schema": "aten::ones_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _euclidean_dist_out(const Tensor & x1, const Tensor & x2, Tensor & out); // {"schema": "aten::_euclidean_dist.out(Tensor x1, Tensor x2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _cdist_forward_out(const Tensor & x1, const Tensor & x2, double p, ::std::optional compute_mode, Tensor & out); // {"schema": "aten::_cdist_forward.out(Tensor x1, Tensor x2, float p, int? compute_mode, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _cdist_backward_out(const Tensor & grad, const Tensor & x1, const Tensor & x2, double p, const Tensor & cdist, Tensor & out); // {"schema": "aten::_cdist_backward.out(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _pdist_forward_out(const Tensor & self, double p, Tensor & out); // {"schema": "aten::_pdist_forward.out(Tensor self, float p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _pdist_backward_out(const Tensor & grad, const Tensor & self, double p, const Tensor & pdist, Tensor & out); // {"schema": "aten::_pdist_backward.out(Tensor grad, Tensor self, float p, Tensor pdist, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & pixel_shuffle_out(const Tensor & self, int64_t upscale_factor, Tensor & out); // {"schema": "aten::pixel_shuffle.out(Tensor self, int upscale_factor, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & pixel_unshuffle_out(const Tensor & self, int64_t downscale_factor, Tensor & out); // {"schema": "aten::pixel_unshuffle.out(Tensor self, int downscale_factor, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & channel_shuffle_out(const Tensor & self, c10::SymInt groups, Tensor & out); // {"schema": "aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _pin_memory_out(const Tensor & self, ::std::optional device, Tensor & out); // {"schema": "aten::_pin_memory.out(Tensor self, Device? device=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scalar_tensor_out(const Scalar & s, Tensor & out); // {"schema": "aten::scalar_tensor.out(Scalar s, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, ::std::optional names, Tensor & out); // {"schema": "aten::rand.names_out(SymInt[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, Tensor & out); // {"schema": "aten::rand.generator_with_names_out(SymInt[] size, *, Generator? generator, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_like_out(const Tensor & self, ::std::optional memory_format, Tensor & out); // {"schema": "aten::rand_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_like_out(const Tensor & self, c10::SymInt high, ::std::optional memory_format, Tensor & out); // {"schema": "aten::randint_like.out(Tensor self, SymInt high, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_like_out(const Tensor & self, c10::SymInt low, c10::SymInt high, ::std::optional memory_format, Tensor & out); // {"schema": "aten::randint_like.low_dtype_out(Tensor self, SymInt low, SymInt high, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, ::std::optional names, Tensor & out); // {"schema": "aten::randn.names_out(SymInt[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, ::std::optional generator, ::std::optional names, Tensor & out); // {"schema": "aten::randn.generator_with_names_out(SymInt[] size, *, Generator? generator, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randn_like_out(const Tensor & self, ::std::optional memory_format, Tensor & out); // {"schema": "aten::randn_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & repeat_out(const Tensor & self, c10::SymIntArrayRef repeats, Tensor & out); // {"schema": "aten::repeat.out(Tensor self, SymInt[] repeats, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & repeat_interleave_out(const Tensor & repeats, ::std::optional output_size, Tensor & out); // {"schema": "aten::repeat_interleave.Tensor_out(Tensor repeats, *, SymInt? output_size=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mkldnn_reshape_out(const Tensor & self, IntArrayRef shape, Tensor & out); // {"schema": "aten::_mkldnn_reshape.out(Tensor self, int[] shape, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & relu_out(const Tensor & self, Tensor & out); // {"schema": "aten::relu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & select_backward_out(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt index, Tensor & out); // {"schema": "aten::select_backward.out(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & celu_out(const Tensor & self, const Scalar & alpha, Tensor & out); // {"schema": "aten::celu.out(Tensor self, Scalar alpha=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slice_backward_out(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt start, c10::SymInt end, c10::SymInt step, Tensor & out); // {"schema": "aten::slice_backward.out(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt start, SymInt end, SymInt step, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slice_scatter_out(const Tensor & self, const Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step, Tensor & out); // {"schema": "aten::slice_scatter.out(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & select_scatter_out(const Tensor & self, const Tensor & src, int64_t dim, c10::SymInt index, Tensor & out); // {"schema": "aten::select_scatter.out(Tensor self, Tensor src, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diagonal_scatter_out(const Tensor & self, const Tensor & src, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diagonal_scatter.out(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & as_strided_scatter_out(const Tensor & self, const Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset, Tensor & out); // {"schema": "aten::as_strided_scatter.out(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void unsafe_split_out(const Tensor & self, c10::SymInt split_size, int64_t dim, TensorList out); // {"schema": "aten::unsafe_split.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void unsafe_split_with_sizes_out(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim, TensorList out); // {"schema": "aten::unsafe_split_with_sizes.out(Tensor self, SymInt[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & sum_out(const Tensor & self, ::std::optional dtype, Tensor & out); // {"schema": "aten::sum.out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple std_mean_out(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim, Tensor & out0, Tensor & out1); // {"schema": "aten::std_mean.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & prod_out(const Tensor & self, ::std::optional dtype, Tensor & out); // {"schema": "aten::prod.out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mkldnn_transpose_out(const Tensor & self, int64_t dim0, int64_t dim1, Tensor & out); // {"schema": "aten::_mkldnn_transpose.out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & flip_out(const Tensor & self, IntArrayRef dims, Tensor & out); // {"schema": "aten::flip.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & roll_out(const Tensor & self, c10::SymIntArrayRef shifts, IntArrayRef dims, Tensor & out); // {"schema": "aten::roll.out(Tensor self, SymInt[1] shifts, int[1] dims=[], *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rot90_out(const Tensor & self, int64_t k, IntArrayRef dims, Tensor & out); // {"schema": "aten::rot90.out(Tensor self, int k=1, int[] dims=[0,1], *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _transform_bias_rescale_qkv_out(const Tensor & qkv, const Tensor & qkv_bias, int64_t num_heads, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_transform_bias_rescale_qkv.out(Tensor qkv, Tensor qkv_bias, int num_heads, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_from_mask_out(const Tensor & t, const Tensor & mask, bool mask_check, Tensor & out); // {"schema": "aten::_nested_tensor_from_mask.out(Tensor t, Tensor mask, bool mask_check=True, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_from_padded_out(const Tensor & padded, const Tensor & cpu_nested_shape_example, bool fuse_transform_0213, Tensor & out); // {"schema": "aten::_nested_from_padded.out(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_size_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_tensor_size.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_strides_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_tensor_strides.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_storage_offsets_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_tensor_storage_offsets.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_from_padded_and_nested_example_out(const Tensor & padded, const Tensor & nt_example, Tensor & out); // {"schema": "aten::_nested_from_padded_and_nested_example.out(Tensor padded, Tensor nt_example, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_view_from_buffer_copy_out(const Tensor & self, const Tensor & nested_size, const Tensor & nested_strides, const Tensor & offsets, Tensor & out); // {"schema": "aten::_nested_view_from_buffer_copy.out(Tensor self, Tensor nested_size, Tensor nested_strides, Tensor offsets, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_view_from_jagged_copy_out(const Tensor & self, const Tensor & offsets, const Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen, Tensor & out); // {"schema": "aten::_nested_view_from_jagged_copy.out(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_get_values_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_get_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _trilinear_out(const Tensor & i1, const Tensor & i2, const Tensor & i3, IntArrayRef expand1, IntArrayRef expand2, IntArrayRef expand3, IntArrayRef sumdim, int64_t unroll_dim, Tensor & out); // {"schema": "aten::_trilinear.out(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _unique_out(const Tensor & self, bool sorted, bool return_inverse, Tensor & out0, Tensor & out1); // {"schema": "aten::_unique.out(Tensor self, bool sorted=True, bool return_inverse=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple unique_dim_out(const Tensor & self, int64_t dim, bool sorted, bool return_inverse, bool return_counts, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::unique_dim.out(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple unique_consecutive_out(const Tensor & self, bool return_inverse, bool return_counts, ::std::optional dim, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::unique_consecutive.out(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple unique_dim_consecutive_out(const Tensor & self, int64_t dim, bool return_inverse, bool return_counts, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::unique_dim_consecutive.out(Tensor self, int dim, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple _unique2_out(const Tensor & self, bool sorted, bool return_inverse, bool return_counts, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_unique2.out(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & _unsafe_view_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple var_mean_out(const Tensor & self, OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim, Tensor & out0, Tensor & out1); // {"schema": "aten::var_mean.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _weight_norm_interface_out(const Tensor & v, const Tensor & g, int64_t dim, Tensor & out0, Tensor & out1); // {"schema": "aten::_weight_norm_interface.out(Tensor v, Tensor g, int dim=0, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _weight_norm_interface_backward_out(const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim, Tensor & out0, Tensor & out1); // {"schema": "aten::_weight_norm_interface_backward.out(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & zeros_out(IntArrayRef size, ::std::optional names, Tensor & out); // {"schema": "aten::zeros.names_out(int[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _efficientzerotensor_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::_efficientzerotensor.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & zeros_like_out(const Tensor & self, ::std::optional memory_format, Tensor & out); // {"schema": "aten::zeros_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _standard_gamma_grad_out(const Tensor & self, const Tensor & output, Tensor & out); // {"schema": "aten::_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _standard_gamma_out(const Tensor & self, ::std::optional generator, Tensor & out); // {"schema": "aten::_standard_gamma.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _dirichlet_grad_out(const Tensor & x, const Tensor & alpha, const Tensor & total, Tensor & out); // {"schema": "aten::_dirichlet_grad.out(Tensor x, Tensor alpha, Tensor total, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sample_dirichlet_out(const Tensor & self, ::std::optional generator, Tensor & out); // {"schema": "aten::_sample_dirichlet.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & poisson_out(const Tensor & self, ::std::optional generator, Tensor & out); // {"schema": "aten::poisson.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & binomial_out(const Tensor & count, const Tensor & prob, ::std::optional generator, Tensor & out); // {"schema": "aten::binomial.out(Tensor count, Tensor prob, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & native_norm_out(const Tensor & self, const Scalar & p, Tensor & out); // {"schema": "aten::native_norm.out(Tensor self, Scalar p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & native_norm_out(const Tensor & self, const ::std::optional & p, IntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::native_norm.ScalarOpt_dim_dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, ScalarType? dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _batch_norm_with_update_functional(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const Tensor & running_mean, const Tensor & running_var, double momentum, double eps); // {"schema": "aten::_batch_norm_with_update_functional(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor, Tensor running_mean_out, Tensor running_var_out)", "dispatch": "True", "default": "True"} +::std::tuple _batch_norm_no_update_out(const Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, double momentum, double eps, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::_batch_norm_no_update.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, float momentum, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +Tensor & _sparse_sum_out(const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::_sparse_sum.dim_out(Tensor self, int[1] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_sum_backward_out(const Tensor & grad, const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::_sparse_sum_backward.out(Tensor grad, Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_csr_sum_out(const Tensor & self, IntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_csr_prod_out(const Tensor & self, IntArrayRef dim, bool keepdim, ::std::optional dtype, Tensor & out); // {"schema": "aten::_sparse_csr_prod.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_sparse_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self, Tensor & out); // {"schema": "aten::_sparse_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_log_softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_sparse_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_log_softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self, Tensor & out); // {"schema": "aten::_sparse_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _spdiags_out(const Tensor & diagonals, const Tensor & offsets, IntArrayRef shape, ::std::optional layout, Tensor & out); // {"schema": "aten::_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & norm_out(const Tensor & self, const ::std::optional & p, ScalarType dtype, Tensor & out); // {"schema": "aten::norm.ScalarOpt_dtype_out(Tensor self, Scalar? p, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & norm_out(const Tensor & self, const Scalar & p, Tensor & out); // {"schema": "aten::norm.Scalar_out(Tensor self, Scalar p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clone_out(const Tensor & self, ::std::optional memory_format, Tensor & out); // {"schema": "aten::clone.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & resize_as_out(const Tensor & self, const Tensor & the_template, ::std::optional memory_format, const Tensor & out); // {"schema": "aten::resize_as.out(Tensor self, Tensor the_template, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resize_as(const Tensor & self, const Tensor & the_template, ::std::optional memory_format); // {"schema": "aten::resize_as(Tensor self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +const Tensor & resize_as_sparse_out(const Tensor & self, const Tensor & the_template, const Tensor & out); // {"schema": "aten::resize_as_sparse.out(Tensor self, Tensor the_template, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resize_as_sparse(const Tensor & self, const Tensor & the_template); // {"schema": "aten::resize_as_sparse(Tensor self, Tensor the_template) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & zero_out(const Tensor & self, Tensor & out); // {"schema": "aten::zero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor zero(const Tensor & self); // {"schema": "aten::zero(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sub_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::sub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rsub_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::rsub.Tensor_out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rsub_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::rsub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_addmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::_sparse_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sparse_coo_tensor_out(IntArrayRef size, Tensor & out); // {"schema": "aten::sparse_coo_tensor.size_out(int[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_coo_tensor_with_dims_out(int64_t sparse_dim, int64_t dense_dim, IntArrayRef size, Tensor & out); // {"schema": "aten::_sparse_coo_tensor_with_dims.out(int sparse_dim, int dense_dim, int[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_coo_tensor_with_dims_and_tensors_out(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const Tensor & indices, const Tensor & values, ::std::optional is_coalesced, Tensor & out); // {"schema": "aten::_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & sparse_resize_out(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim, const Tensor & out); // {"schema": "aten::sparse_resize.out(Tensor self, int[] size, int sparse_dim, int dense_dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor sparse_resize(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize(Tensor self, int[] size, int sparse_dim, int dense_dim) -> Tensor", "dispatch": "True", "default": "True"} +const Tensor & sparse_resize_and_clear_out(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim, const Tensor & out); // {"schema": "aten::sparse_resize_and_clear.out(Tensor self, int[] size, int sparse_dim, int dense_dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor sparse_resize_and_clear(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize_and_clear(Tensor self, int[] size, int sparse_dim, int dense_dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sparse_mask_out(const Tensor & self, const Tensor & mask, Tensor & out); // {"schema": "aten::sparse_mask.out(Tensor self, Tensor mask, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_mask_projection_out(const Tensor & self, const Tensor & mask, bool accumulate_matches, Tensor & out); // {"schema": "aten::_sparse_mask_projection.out(Tensor self, Tensor mask, bool accumulate_matches=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_dense_out(const Tensor & self, ::std::optional dtype, ::std::optional masked_grad, Tensor & out); // {"schema": "aten::_to_dense.out(Tensor self, ScalarType? dtype=None, bool? masked_grad=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _coalesce_out(const Tensor & self, Tensor & out); // {"schema": "aten::_coalesce.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _coalesced_out(const Tensor & self, bool coalesced, Tensor & out); // {"schema": "aten::_coalesced.out(Tensor self, bool coalesced, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _coalesced(const Tensor & self, bool coalesced); // {"schema": "aten::_coalesced(Tensor self, bool coalesced) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copy_sparse_to_sparse_out(const Tensor & self, const Tensor & src, bool non_blocking, Tensor & out); // {"schema": "aten::copy_sparse_to_sparse.out(Tensor self, Tensor src, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor copy_sparse_to_sparse(const Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy_sparse_to_sparse(Tensor self, Tensor src, bool non_blocking=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_out(const Tensor & self, int64_t sparse_dim, Tensor & out); // {"schema": "aten::_to_sparse.sparse_dim_out(Tensor self, int sparse_dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_out(const Tensor & self, ::std::optional layout, OptionalIntArrayRef blocksize, ::std::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse.out(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_csr_out(const Tensor & self, ::std::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_csr.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_csc_out(const Tensor & self, ::std::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_bsr_out(const Tensor & self, IntArrayRef blocksize, ::std::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_bsr.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_bsc_out(const Tensor & self, IntArrayRef blocksize, ::std::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & to_mkldnn_out(const Tensor & self, ::std::optional dtype, Tensor & out); // {"schema": "aten::to_mkldnn.out(Tensor self, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_reorder_conv2d_weight_out(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, OptionalSymIntArrayRef input_size, Tensor & out); // {"schema": "aten::mkldnn_reorder_conv2d_weight.out(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_reorder_conv3d_weight_out(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, OptionalSymIntArrayRef input_size, Tensor & out); // {"schema": "aten::mkldnn_reorder_conv3d_weight.out(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, SymInt[]? input_size=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantize_per_tensor_dynamic_out(const Tensor & self, ScalarType dtype, bool reduce_range, Tensor & out); // {"schema": "aten::quantize_per_tensor_dynamic.out(Tensor self, ScalarType dtype, bool reduce_range, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantize_per_tensor_out(const Tensor & self, double scale, int64_t zero_point, ScalarType dtype, Tensor & out); // {"schema": "aten::quantize_per_tensor.out(Tensor self, float scale, int zero_point, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantize_per_tensor_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, ScalarType dtype, Tensor & out); // {"schema": "aten::quantize_per_tensor.tensor_qparams_out(Tensor self, Tensor scale, Tensor zero_point, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void quantize_per_tensor_out(TensorList tensors, const Tensor & scales, const Tensor & zero_points, ScalarType dtype, TensorList out); // {"schema": "aten::quantize_per_tensor.tensors_out(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & quantize_per_channel_out(const Tensor & self, const Tensor & scales, const Tensor & zero_points, int64_t axis, ScalarType dtype, Tensor & out); // {"schema": "aten::quantize_per_channel.out(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & dequantize_out(const Tensor & self, Tensor & out); // {"schema": "aten::dequantize.self_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void dequantize_out(TensorList tensors, TensorList out); // {"schema": "aten::dequantize.tensors_out(Tensor[] tensors, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & q_per_channel_scales_out(const Tensor & self, Tensor & out); // {"schema": "aten::q_per_channel_scales.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & q_per_channel_zero_points_out(const Tensor & self, Tensor & out); // {"schema": "aten::q_per_channel_zero_points.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & int_repr_out(const Tensor & self, Tensor & out); // {"schema": "aten::int_repr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _make_per_tensor_quantized_tensor_out(const Tensor & self, double scale, int64_t zero_point, Tensor & out); // {"schema": "aten::_make_per_tensor_quantized_tensor.out(Tensor self, float scale, int zero_point, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _make_per_channel_quantized_tensor_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, Tensor & out); // {"schema": "aten::_make_per_channel_quantized_tensor.out(Tensor self, Tensor scale, Tensor zero_point, int axis, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple fake_quantize_per_tensor_affine_cachemask_out(const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max, Tensor & out0, Tensor & out1); // {"schema": "aten::fake_quantize_per_tensor_affine_cachemask.out(Tensor self, float scale, int zero_point, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _fake_quantize_per_tensor_affine_cachemask_tensor_qparams_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, const Tensor & fake_quant_enabled, int64_t quant_min, int64_t quant_max, Tensor & out0, Tensor & out1); // {"schema": "aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out(Tensor self, Tensor scale, Tensor zero_point, Tensor fake_quant_enabled, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _fake_quantize_learnable_per_tensor_affine_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor, Tensor & out); // {"schema": "aten::_fake_quantize_learnable_per_tensor_affine.out(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple fake_quantize_per_channel_affine_cachemask_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, Tensor & out0, Tensor & out1); // {"schema": "aten::fake_quantize_per_channel_affine_cachemask.out(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _fake_quantize_learnable_per_channel_affine_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, Tensor & out); // {"schema": "aten::_fake_quantize_learnable_per_channel_affine.out(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _fused_moving_avg_obs_fq_helper_out(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, Tensor & running_min, Tensor & running_max, Tensor & scale, Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant, Tensor & out0, Tensor & out1); // {"schema": "aten::_fused_moving_avg_obs_fq_helper.out(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False, *, Tensor(e!) out0, Tensor(f!) out1) -> (Tensor(e!), Tensor(f!))", "dispatch": "True", "default": "True"} +::std::tuple _fused_moving_avg_obs_fq_helper_functional(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, const Tensor & running_min, const Tensor & running_max, const Tensor & scale, const Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); // {"schema": "aten::_fused_moving_avg_obs_fq_helper_functional(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor running_min, Tensor running_max, Tensor scale, Tensor zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> (Tensor output, Tensor mask, Tensor running_min_out, Tensor running_max_out, Tensor scale_out, Tensor zero_point_out)", "dispatch": "True", "default": "True"} +Tensor & _to_copy_out(const Tensor & self, bool non_blocking, ::std::optional memory_format, Tensor & out); // {"schema": "aten::_to_copy.out(Tensor self, *, bool non_blocking=False, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _lstm_mps_out(const Tensor & input, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4, Tensor & out5); // {"schema": "aten::_lstm_mps.out(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4, Tensor(f!) out5) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!), Tensor(f!))", "dispatch": "True", "default": "True"} +void lstm_mps_backward_out(const ::std::optional & grad_y, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const Tensor & z_state, const Tensor & cell_state_fwd, const Tensor & input, const Tensor & layersOutputs, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, Tensor & out0, TensorList out1, TensorList out2); // {"schema": "aten::lstm_mps_backward.out(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!)[] out1, Tensor(c!)[] out2) -> ()", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_lstm_cell_out(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_thnn_fused_lstm_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_lstm_cell_backward_impl_out(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_thnn_fused_lstm_cell_backward_impl.out(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_gru_cell_out(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, Tensor & out0, Tensor & out1); // {"schema": "aten::_thnn_fused_gru_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_gru_cell_backward_out(const Tensor & grad_hy, const Tensor & workspace, bool has_bias, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4); // {"schema": "aten::_thnn_fused_gru_cell_backward.out(Tensor grad_hy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))", "dispatch": "True", "default": "True"} +::std::tuple _pack_padded_sequence_out(const Tensor & input, const Tensor & lengths, bool batch_first, Tensor & out0, Tensor & out1); // {"schema": "aten::_pack_padded_sequence.out(Tensor input, Tensor lengths, bool batch_first, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, Storage source, Tensor & out); // {"schema": "aten::set.source_Storage_out(Tensor self, Storage source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self, Storage source); // {"schema": "aten::set.source_Storage(Tensor self, Storage source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::set.source_Storage_storage_offset_out(Tensor self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[], *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self, Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::set.source_Storage_storage_offset(Tensor self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, const Tensor & source, Tensor & out); // {"schema": "aten::set.source_Tensor_out(Tensor self, Tensor source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self, const Tensor & source); // {"schema": "aten::set.source_Tensor(Tensor self, Tensor source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, Tensor & out); // {"schema": "aten::set.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self); // {"schema": "aten::set(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lift_out(const Tensor & self, Tensor & out); // {"schema": "aten::lift.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lift_fresh_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::lift_fresh_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & masked_fill_out(const Tensor & self, const Tensor & mask, const Scalar & value, Tensor & out); // {"schema": "aten::masked_fill.Scalar_out(Tensor self, Tensor mask, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & masked_fill_out(const Tensor & self, const Tensor & mask, const Tensor & value, Tensor & out); // {"schema": "aten::masked_fill.Tensor_out(Tensor self, Tensor mask, Tensor value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & masked_scatter_out(const Tensor & self, const Tensor & mask, const Tensor & source, Tensor & out); // {"schema": "aten::masked_scatter.out(Tensor self, Tensor mask, Tensor source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _masked_softmax_out(const Tensor & self, const Tensor & mask, ::std::optional dim, ::std::optional mask_type, Tensor & out); // {"schema": "aten::_masked_softmax.out(Tensor self, Tensor mask, int? dim=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _masked_softmax_backward_out(const Tensor & grad_output, const Tensor & output, const Tensor & mask, ::std::optional dim, Tensor & out); // {"schema": "aten::_masked_softmax_backward.out(Tensor grad_output, Tensor output, Tensor mask, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & put_out(const Tensor & self, const Tensor & index, const Tensor & source, bool accumulate, Tensor & out); // {"schema": "aten::put.out(Tensor self, Tensor index, Tensor source, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & index_fill_out(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, Tensor & out); // {"schema": "aten::index_fill.int_Scalar_out(Tensor self, int dim, Tensor index, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & index_fill_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & value, Tensor & out); // {"schema": "aten::index_fill.int_Tensor_out(Tensor self, int dim, Tensor index, Tensor value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_and.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_or_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_or.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_xor_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_xor.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __lshift___out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::__lshift__.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __lshift___out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::__lshift__.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_left_shift.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __rshift___out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::__rshift__.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __rshift___out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::__rshift__.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_right_shift.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & random_out(const Tensor & self, int64_t from, ::std::optional to, ::std::optional generator, Tensor & out); // {"schema": "aten::random.from_out(Tensor self, int from, int? to, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor random(const Tensor & self, int64_t from, ::std::optional to, ::std::optional generator); // {"schema": "aten::random.from(Tensor self, int from, int? to, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & random_out(const Tensor & self, int64_t to, ::std::optional generator, Tensor & out); // {"schema": "aten::random.to_out(Tensor self, int to, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor random(const Tensor & self, int64_t to, ::std::optional generator); // {"schema": "aten::random.to(Tensor self, int to, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & random_out(const Tensor & self, ::std::optional generator, Tensor & out); // {"schema": "aten::random.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor random(const Tensor & self, ::std::optional generator); // {"schema": "aten::random(Tensor self, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & uniform_out(const Tensor & self, double from, double to, ::std::optional generator, Tensor & out); // {"schema": "aten::uniform.out(Tensor self, float from=0, float to=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor uniform(const Tensor & self, double from, double to, ::std::optional generator); // {"schema": "aten::uniform(Tensor self, float from=0, float to=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cauchy_out(const Tensor & self, double median, double sigma, ::std::optional generator, Tensor & out); // {"schema": "aten::cauchy.out(Tensor self, float median=0, float sigma=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor cauchy(const Tensor & self, double median, double sigma, ::std::optional generator); // {"schema": "aten::cauchy(Tensor self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log_normal_out(const Tensor & self, double mean, double std, ::std::optional generator, Tensor & out); // {"schema": "aten::log_normal.out(Tensor self, float mean=1, float std=2, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log_normal(const Tensor & self, double mean, double std, ::std::optional generator); // {"schema": "aten::log_normal(Tensor self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & exponential_out(const Tensor & self, double lambd, ::std::optional generator, Tensor & out); // {"schema": "aten::exponential.out(Tensor self, float lambd=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor exponential(const Tensor & self, double lambd, ::std::optional generator); // {"schema": "aten::exponential(Tensor self, float lambd=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & geometric_out(const Tensor & self, double p, ::std::optional generator, Tensor & out); // {"schema": "aten::geometric.out(Tensor self, float p, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor geometric(const Tensor & self, double p, ::std::optional generator); // {"schema": "aten::geometric(Tensor self, float p, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tril_indices_out(int64_t row, int64_t col, int64_t offset, Tensor & out); // {"schema": "aten::tril_indices.out(int row, int col, int offset=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & triu_indices_out(int64_t row, int64_t col, int64_t offset, Tensor & out); // {"schema": "aten::triu_indices.out(int row, int col, int offset=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & trace_out(const Tensor & self, Tensor & out); // {"schema": "aten::trace.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _cholesky_solve_helper_out(const Tensor & self, const Tensor & A, bool upper, Tensor & out); // {"schema": "aten::_cholesky_solve_helper.out(Tensor self, Tensor A, bool upper, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & dist_out(const Tensor & self, const Tensor & other, const Scalar & p, Tensor & out); // {"schema": "aten::dist.out(Tensor self, Tensor other, Scalar p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void _histogramdd_bin_edges_out(const Tensor & self, IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density, TensorList out); // {"schema": "aten::_histogramdd_bin_edges.out(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & _histogramdd_from_bin_cts_out(const Tensor & self, IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density, Tensor & out); // {"schema": "aten::_histogramdd_from_bin_cts.out(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _histogramdd_from_bin_tensors_out(const Tensor & self, TensorList bins, const ::std::optional & weight, bool density, Tensor & out); // {"schema": "aten::_histogramdd_from_bin_tensors.out(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & remainder_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::remainder.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & unfold_backward_out(const Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, Tensor & out); // {"schema": "aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & normal_out(const Tensor & self, double mean, double std, ::std::optional generator, Tensor & out); // {"schema": "aten::normal.out(Tensor self, float mean=0, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void _amp_foreach_non_finite_check_and_unscale_out(TensorList self, Tensor & found_inf, const Tensor & inv_scale, TensorList out); // {"schema": "aten::_amp_foreach_non_finite_check_and_unscale.out(Tensor[] self, Tensor(b!) found_inf, Tensor inv_scale, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,Tensor> _amp_foreach_non_finite_check_and_unscale(TensorList self, const Tensor & found_inf, const Tensor & inv_scale); // {"schema": "aten::_amp_foreach_non_finite_check_and_unscale(Tensor[] self, Tensor found_inf, Tensor inv_scale) -> (Tensor[] self_out, Tensor found_inf_out)", "dispatch": "True", "default": "True"} +Tensor & _amp_update_scale_out(const Tensor & self, Tensor & growth_tracker, const Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, Tensor & out); // {"schema": "aten::_amp_update_scale.out(Tensor self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _amp_update_scale(const Tensor & self, const Tensor & growth_tracker, const Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); // {"schema": "aten::_amp_update_scale(Tensor self, Tensor growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> (Tensor, Tensor growth_tracker_out)", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_add.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, TensorList other, const Scalar & alpha, TensorList out); // {"schema": "aten::_foreach_add.List_out(Tensor[] self, Tensor[] other, *, Scalar alpha=1, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_add.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, const Tensor & other, const Scalar & alpha, TensorList out); // {"schema": "aten::_foreach_add.Tensor_out(Tensor[] self, Tensor other, *, Scalar alpha=1, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sub_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_sub.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sub_out(TensorList self, TensorList other, const Scalar & alpha, TensorList out); // {"schema": "aten::_foreach_sub.List_out(Tensor[] self, Tensor[] other, *, Scalar alpha=1, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sub_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_sub.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_mul.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_mul.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_mul.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, const Tensor & other, TensorList out); // {"schema": "aten::_foreach_mul.Tensor_out(Tensor[] self, Tensor other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_div.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_div.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_div.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, const Tensor & other, TensorList out); // {"schema": "aten::_foreach_div.Tensor_out(Tensor[] self, Tensor other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_clamp_max.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_clamp_max.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_clamp_max.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_clamp_min.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_clamp_min.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_clamp_min.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_maximum_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_maximum.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_maximum_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_maximum.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_maximum_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_maximum.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_minimum_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_minimum.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_minimum_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_minimum.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_minimum_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_minimum.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_out(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value, TensorList out); // {"schema": "aten::_foreach_addcdiv.Scalar_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_out(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_addcdiv.ScalarList_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_out(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars, TensorList out); // {"schema": "aten::_foreach_addcdiv.Tensor_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_out(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value, TensorList out); // {"schema": "aten::_foreach_addcmul.Scalar_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_out(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_addcmul.ScalarList_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_out(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars, TensorList out); // {"schema": "aten::_foreach_addcmul.Tensor_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_abs_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_abs.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_acos_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_acos.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_asin_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_asin.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_atan_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_atan.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_ceil_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_ceil.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_cos_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_cos.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_cosh_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_cosh.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_erf_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_erf.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_erfc_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_erfc.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_exp_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_exp.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_expm1_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_expm1.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_floor_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_floor.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_frac_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_frac.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_lerp_out(TensorList self, TensorList tensors1, TensorList weights, TensorList out); // {"schema": "aten::_foreach_lerp.List_out(Tensor[] self, Tensor[] tensors1, Tensor[] weights, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_lerp_out(TensorList self, TensorList tensors1, const Scalar & weight, TensorList out); // {"schema": "aten::_foreach_lerp.Scalar_out(Tensor[] self, Tensor[] tensors1, Scalar weight, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_lgamma_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_lgamma.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log10_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log10.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log1p_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log1p.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log2_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log2.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_max_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_max.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_neg_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_neg.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_norm_out(TensorList self, const Scalar & ord, ::std::optional dtype, TensorList out); // {"schema": "aten::_foreach_norm.Scalar_out(Tensor[] self, Scalar ord=2, ScalarType? dtype=None, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_out(TensorList self, TensorList exponent, TensorList out); // {"schema": "aten::_foreach_pow.List_out(Tensor[] self, Tensor[] exponent, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_out(TensorList self, const Scalar & exponent, TensorList out); // {"schema": "aten::_foreach_pow.Scalar_out(Tensor[] self, Scalar exponent, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_out(TensorList self, ArrayRef exponent, TensorList out); // {"schema": "aten::_foreach_pow.ScalarList_out(Tensor[] self, Scalar[] exponent, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_reciprocal_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_reciprocal.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_round_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_round.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sigmoid_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sigmoid.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sign_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sign.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sin_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sin.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sinh_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sinh.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sqrt_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sqrt.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_tan_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_tan.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_tanh_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_tanh.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_trunc_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_trunc.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_zero_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_zero.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_zero(TensorList self); // {"schema": "aten::_foreach_zero(Tensor[] self) -> Tensor[] self_out", "dispatch": "True", "default": "True"} +void _foreach_copy_out(TensorList self, TensorList src, bool non_blocking, TensorList out); // {"schema": "aten::_foreach_copy.out(Tensor[] self, Tensor[] src, bool non_blocking=False, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & bucketize_out(const Scalar & self, const Tensor & boundaries, bool out_int32, bool right, Tensor & out); // {"schema": "aten::bucketize.Scalar_out(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & glu_jvp_out(const Tensor & glu, const Tensor & x, const Tensor & dx, int64_t dim, Tensor & out); // {"schema": "aten::glu_jvp.out(Tensor glu, Tensor x, Tensor dx, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & glu_backward_jvp_out(const Tensor & grad_x, const Tensor & grad_glu, const Tensor & x, const Tensor & dgrad_glu, const Tensor & dx, int64_t dim, Tensor & out); // {"schema": "aten::glu_backward_jvp.out(Tensor grad_x, Tensor grad_glu, Tensor x, Tensor dgrad_glu, Tensor dx, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hardswish_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::hardswish_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rrelu_with_noise_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, bool self_is_result, Tensor & out); // {"schema": "aten::rrelu_with_noise_backward.out(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_adaptive_avg_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::mkldnn_adaptive_avg_pool2d_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool2d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::_adaptive_avg_pool2d.out(Tensor self, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::_adaptive_avg_pool2d_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool3d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::_adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::_adaptive_avg_pool3d_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _slow_conv2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_slow_conv2d_backward.output_mask_out(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & conv_depthwise3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::conv_depthwise3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slow_conv_dilated2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_dilated2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slow_conv_dilated3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_dilated3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & isinf_out(const Tensor & self, Tensor & out); // {"schema": "aten::isinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & linalg_matrix_exp_out(const Tensor & self, Tensor & out); // {"schema": "aten::linalg_matrix_exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_optional_intlist_out(const Tensor & values, OptionalIntArrayRef addends, Tensor & out); // {"schema": "aten::_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_optional_filled_intlist_out(const Tensor & values, OptionalIntArrayRef addends, Tensor & out); // {"schema": "aten::_test_optional_filled_intlist.out(Tensor values, int[2]? addends, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_optional_floatlist_out(const Tensor & values, ::std::optional> addends, Tensor & out); // {"schema": "aten::_test_optional_floatlist.out(Tensor values, float[]? addends, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_warn_in_autograd_out(const Tensor & self, Tensor & out); // {"schema": "aten::_test_warn_in_autograd.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_autograd_multiple_dispatch_out(const Tensor & self, Tensor & out); // {"schema": "aten::_test_autograd_multiple_dispatch.fullcoverage_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_autograd_multiple_dispatch_view_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_test_autograd_multiple_dispatch_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & segment_reduce_out(const Tensor & data, c10::string_view reduce, const ::std::optional & lengths, const ::std::optional & indices, const ::std::optional & offsets, int64_t axis, bool unsafe, const ::std::optional & initial, Tensor & out); // {"schema": "aten::segment_reduce.out(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, Tensor? offsets=None, int axis=0, bool unsafe=False, Scalar? initial=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _segment_reduce_backward_out(const Tensor & grad, const Tensor & output, const Tensor & data, c10::string_view reduce, const ::std::optional & lengths, const ::std::optional & offsets, int64_t axis, const ::std::optional & initial, Tensor & out); // {"schema": "aten::_segment_reduce_backward.out(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_from_tensor_list_out(TensorList list, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, Tensor & out); // {"schema": "aten::_nested_tensor_from_tensor_list.out(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _fw_primal_copy_out(const Tensor & self, int64_t level, Tensor & out); // {"schema": "aten::_fw_primal_copy.out(Tensor self, int level, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _make_dual_copy_out(const Tensor & primal, const Tensor & tangent, int64_t level, Tensor & out); // {"schema": "aten::_make_dual_copy.out(Tensor primal, Tensor tangent, int level, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_as_real_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::view_as_real_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_as_complex_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::view_as_complex_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _conj_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_conj_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _neg_view_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_neg_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & as_strided_copy_out(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset, Tensor & out); // {"schema": "aten::as_strided_copy.out(Tensor self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_broadcast_to_copy_out(const Tensor & self, IntArrayRef size, Tensor & out); // {"schema": "aten::_sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diagonal_copy_out(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diagonal_copy.out(Tensor self, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & expand_copy_out(const Tensor & self, c10::SymIntArrayRef size, bool implicit, Tensor & out); // {"schema": "aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & permute_copy_out(const Tensor & self, IntArrayRef dims, Tensor & out); // {"schema": "aten::permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _reshape_alias_copy_out(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::_reshape_alias_copy.out(Tensor self, SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & select_copy_out(const Tensor & self, int64_t dim, c10::SymInt index, Tensor & out); // {"schema": "aten::select_copy.int_out(Tensor self, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & detach_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::detach_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slice_copy_out(const Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step, Tensor & out); // {"schema": "aten::slice_copy.Tensor_out(Tensor self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::squeeze_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_copy_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::squeeze_copy.dim_out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_copy_out(const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::squeeze_copy.dims_out(Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & t_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::t_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & transpose_copy_out(const Tensor & self, int64_t dim0, int64_t dim1, Tensor & out); // {"schema": "aten::transpose_copy.int_out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & unsqueeze_copy_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::unsqueeze_copy.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _values_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & values_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & crow_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::crow_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & col_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::col_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ccol_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::ccol_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & row_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::row_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_copy_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::view_copy.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_copy_out(const Tensor & self, ScalarType dtype, Tensor & out); // {"schema": "aten::view_copy.dtype_out(Tensor self, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & unfold_copy_out(const Tensor & self, int64_t dimension, int64_t size, int64_t step, Tensor & out); // {"schema": "aten::unfold_copy.out(Tensor self, int dimension, int size, int step, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & alias_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & to_padded_tensor_out(const Tensor & self, double padding, OptionalSymIntArrayRef output_size, Tensor & out); // {"schema": "aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _transformer_encoder_layer_fwd_out(const Tensor & src, int64_t embed_dim, int64_t num_heads, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const Tensor & norm_weight_1, const Tensor & norm_bias_1, const Tensor & norm_weight_2, const Tensor & norm_bias_2, const Tensor & ffn_weight_1, const Tensor & ffn_bias_1, const Tensor & ffn_weight_2, const Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type, Tensor & out); // {"schema": "aten::_transformer_encoder_layer_fwd.out(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _native_multi_head_attention_out(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const ::std::optional & mask, bool need_weights, bool average_attn_weights, ::std::optional mask_type, Tensor & out0, Tensor & out1); // {"schema": "aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _triton_scaled_dot_attention_out(const Tensor & q, const Tensor & k, const Tensor & v, double dropout_p, Tensor & out); // {"schema": "aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _triton_multi_head_attention_out(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const ::std::optional & mask, Tensor & out); // {"schema": "aten::_triton_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _foobar_out(const Tensor & self, bool arg1, bool arg2, bool arg3, Tensor & out); // {"schema": "aten::_foobar.out(Tensor self, bool arg1=True, bool arg2=True, *, bool arg3=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void _fused_adam_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adam.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adam(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adam(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_adam_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adam.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adam(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adam.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_adamw_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adamw.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adamw(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adamw(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_adamw_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adamw.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adamw(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adamw.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_sgd_out(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_sgd.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_sgd(Tensor[] self, Tensor[] grads, Tensor[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] momentum_buffer_list_out)", "dispatch": "True", "default": "True"} +void _fused_sgd_out(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, const Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_sgd.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, const Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_sgd.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] momentum_buffer_list_out)", "dispatch": "True", "default": "True"} +void _fused_adagrad_out(TensorList self, TensorList grads, TensorList state_sums, TensorList state_steps, double lr, double lr_decay, double weight_decay, double eps, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adagrad.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector> _fused_adagrad(TensorList self, TensorList grads, TensorList state_sums, TensorList state_steps, double lr, double lr_decay, double weight_decay, double eps, bool maximize, const ::std::optional & grad_scale, const ::std::optional & found_inf); // {"schema": "aten::_fused_adagrad(Tensor[] self, Tensor[] grads, Tensor[] state_sums, Tensor[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] state_sums_out, Tensor[] state_steps_out)", "dispatch": "True", "default": "True"} diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/SavedTensorHooks.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/SavedTensorHooks.h new file mode 100644 index 0000000000000000000000000000000000000000..2803bdc646689fcbb4e9f439fe7419d74a6c45a4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/SavedTensorHooks.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace at { + +namespace impl { + +struct TORCH_API SavedTensorDefaultHooksTLS { + // PyObject is defined in c10/util/python_stub.h + std::stack> stack; + + // See NOTE: [Disabling SavedTensorDefaultHooks] for context + // NOTE: [disabled_error_message invariant] + // disabled_error_message is nullopt IFF Saved Tensor hooks is enabled + // We did this for efficiency (so we didn't have to keep a separate bool + // around) + std::optional disabled_error_message; + + // See NOTE: [Deferring tensor pack/unpack hooks until runtime] + bool is_tracing = false; +}; + +} // namespace impl + +struct TORCH_API SavedTensorDefaultHooks { + static void push_hooks( + c10::SafePyObject pack_hook, + c10::SafePyObject unpack_hook); + static std::pair pop_hooks(); + static std::optional> + get_hooks(); + static void lazy_initialize(); + + static const impl::SavedTensorDefaultHooksTLS& get_tls_state(); + static void set_tls_state(const impl::SavedTensorDefaultHooksTLS& tls); + + // NOTE: [Disabling SavedTensorDefaultHooks] + // A developer of a PyTorch feature may choose to disable SavedTensorDefault + // hooks, especially if their feature does not work with it. If they are + // disabled, then the following will raise an error: + // - Attempting to push_hooks + // - calling disable(message) with a non-zero stack (hooks) size + static void disable(const std::string& error_message); + static void enable(); + static bool is_enabled(); + static const std::optional& get_disabled_error_message(); + + // NOTE: [Deferring tensor pack/unpack hooks until runtime] + // To preserve eager semantics of pack/unpack hooks firing only once per saved + // variable, Dynamo/AOTAutograd need to defer hook firing until runtime. Using + // disable() would loud error at trace time, and pushing a no-op hook would + // fail when the traced code is wrapped in a disable_saved_tensors_hooks ctx. + // To do so, we disable these hooks during tracing. See + // https://github.com/pytorch/pytorch/issues/113263. + static bool set_tracing(bool is_tracing); +}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Scalar.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Scalar.h new file mode 100644 index 0000000000000000000000000000000000000000..e12557428f15674e4382983c07de64c3e43e8af0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Scalar.h @@ -0,0 +1,3 @@ +#pragma once + +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h new file mode 100644 index 0000000000000000000000000000000000000000..ed591955dd876bd147218e706eb16ec17c34dd90 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#else +#include +#endif + +namespace at::detail { +// When filling a number to 1-element CPU tensor, we want to skip +// everything but manipulate data ptr directly. +// Ideally this fast pass should be implemented in TensorIterator, +// but we also want to skip compute_types which in not avoidable +// in TensorIterator for now. +Tensor& scalar_fill(Tensor& self, const Scalar& value); +TORCH_API Tensor scalar_tensor_static( + const Scalar& s, + std::optional dtype_opt, + std::optional device_opt); +} // namespace at::detail + +// This is in the c10 namespace because we use ADL to find the functions in it. +namespace c10 { + +// FIXME: this should be (and was) Scalar::toTensor, but there is currently no +// way to implement this without going through Derived Types (which are not part +// of core). +inline at::Tensor scalar_to_tensor( + const Scalar& s, + const Device device = at::kCPU) { + // This is the fast track we have for CPU scalar tensors. + if (device == at::kCPU) { + return at::detail::scalar_tensor_static(s, s.type(), at::kCPU); + } + return at::scalar_tensor(s, at::device(device).dtype(s.type())); +} + +} // namespace c10 + +namespace at::native { + +inline Tensor wrapped_scalar_tensor( + const Scalar& scalar, + const Device device = at::kCPU) { + auto tensor = scalar_to_tensor(scalar, device); + tensor.unsafeGetTensorImpl()->set_wrapped_number(true); + return tensor; +} + +} // namespace at::native diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarType.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarType.h new file mode 100644 index 0000000000000000000000000000000000000000..2181250740e23808f06e63660f50ca887169bcb1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarType.h @@ -0,0 +1,4 @@ +#pragma once +#include // for BC reasons +#include +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/SparseCsrTensorImpl.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/SparseCsrTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..94ac1e1c39344876e565ed606fe32afba5f7b884 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/SparseCsrTensorImpl.h @@ -0,0 +1,206 @@ +#pragma once + +#include +#include +#include +#include +namespace at { + +// Struct implementing a sparse CSR tensor. It uses three 1-D tensors for +// denoting the data: `crow_indices_`, `col_indices_` and `values_`. +// The `crow_indices_` tensor is a integer tensor of shape `(size(0) + 1)` +// that represents the compressed row indices of the CSR tensor. The +// `col_indices_` tensor is an integer tensor of shape `(nnz())` +// that explicitly stores the column indices of each value of the sparse +// tensor. The `values_` tensor can be of any pytorch-supported data type +// and has shape `(nnz())`. +// +// Since the main advantage of the CSR format over the COO format is speed of +// computation, care must be taken to facilitate smooth interfacing of +// these data structures with optimized libraries such as MKL and MAGMA. +// Since the MKL interface for pytorch currently uses indexing with int32 +// type, it is important to make sure that the `crow_indices` and `col_indices` +// are of type int32 when calling MKL routines such as SPMM or SPMV. +// +// If not calling MKL, it should be alright to use 64 bit integer tensors +// for indexing. +struct TORCH_API SparseCsrTensorImpl : public TensorImpl { + Tensor crow_indices_; + Tensor col_indices_; + Tensor values_; + Layout layout_; + + public: + explicit SparseCsrTensorImpl( + at::DispatchKeySet, + at::Device device, + Layout layout, + const caffe2::TypeMeta); + + void resize_(int64_t nnz, IntArrayRef size); + void resize_and_clear_( + int64_t sparse_dim, + int64_t dense_dim, + IntArrayRef size); + void resize_as_sparse_compressed_tensor_(const Tensor& src); + void set_member_tensors( + const Tensor& crow_indices, + const Tensor& col_indices, + const Tensor& values, + c10::SymIntArrayRef size); + void set_member_tensors( + const Tensor& crow_indices, + const Tensor& col_indices, + const Tensor& values, + IntArrayRef size); + const Tensor& compressed_indices() const { + return crow_indices_; + } + const Tensor& plain_indices() const { + return col_indices_; + } + const Tensor& values() const { + return values_; + } + int64_t nnz() { + return col_indices_.size(-1); + } + + inline int64_t batch_dim() const noexcept { + return crow_indices_.dim() - 1; + } + + inline int64_t sparse_dim() const noexcept { + return 2; + } + + inline int64_t dense_dim() const noexcept { + return values_.dim() - batch_dim() - block_dim() - 1; + } + + private: + inline int64_t block_dim() const noexcept { + return (layout_ == kSparseBsr || layout_ == kSparseBsc ? 2 : 0); + } + + protected: + IntArrayRef strides_custom() const override; + SymIntArrayRef sym_strides_custom() const override; + bool is_contiguous_custom(MemoryFormat) const override; + + public: + void set_size(int64_t dim, int64_t new_size) override; + void set_stride(int64_t dim, int64_t new_stride) override; + void set_storage_offset(int64_t storage_offset) override; + Layout layout_impl() const override { + return layout_; + } + void set_layout(Layout layout) { + switch (layout) { + case kSparseCsr: + case kSparseCsc: + case kSparseBsr: + case kSparseBsc: + layout_ = layout; + break; + default: + TORCH_CHECK(false, "unsupported layout ", layout); + } + } + + template + c10::intrusive_ptr shallow_copy_and_detach_core( + VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const { + const auto mode_stack_len = c10::impl::TorchDispatchModeTLS::stack_len(); + c10::impl::PyInterpreter&& interpreter = nullptr; + if (mode_stack_len > 0 && + !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) { + const auto& cur_torch_dispatch_mode_state = + c10::impl::TorchDispatchModeTLS::get_stack_at(mode_stack_len - 1); + interpreter = cur_torch_dispatch_mode_state->pyinterpreter(); + } else if ( + key_set_.has(DispatchKey::Python) && + !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) { + interpreter = pyobj_slot_.load_pyobj_interpreter(); + } else { + // otherwise just copy the SparseTensorImpl and not the PyObject. + auto impl = c10::make_intrusive( + key_set(), device(), layout_impl(), dtype()); + copy_tensor_metadata( + /*src_sparse_impl=*/this, + /*dest_sparse_impl=*/impl.get(), + /*version_counter=*/version_counter, + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + return impl; + } + auto r = interpreter->detach(this); + r->set_version_counter(std::forward(version_counter)); + r->set_allow_tensor_metadata_change(allow_tensor_metadata_change); + return r; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override { + return shallow_copy_and_detach_core( + version_counter, allow_tensor_metadata_change); + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override { + return shallow_copy_and_detach_core( + std::move(version_counter), allow_tensor_metadata_change); + } + + private: + explicit SparseCsrTensorImpl( + at::DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + at::Tensor crow_indices, + at::Tensor col_indices, + at::Tensor values, + at::Layout layout); + + const char* tensorimpl_type_name() const override; + + /** + * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / + * storage_offset) from one TensorImpl to another TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE + * [ TensorImpl Shallow-Copying ]. + */ + static void copy_tensor_metadata( + const SparseCsrTensorImpl* src_sparse_impl, + SparseCsrTensorImpl* dest_sparse_impl, + c10::VariableVersion version_counter, + bool allow_tensor_metadata_change) { + TensorImpl::copy_tensor_metadata( + src_sparse_impl, + dest_sparse_impl, + std::move(version_counter), + allow_tensor_metadata_change); + + // Sparse-specific fields + dest_sparse_impl->crow_indices_ = src_sparse_impl->compressed_indices(); + dest_sparse_impl->col_indices_ = src_sparse_impl->plain_indices(); + dest_sparse_impl->values_ = src_sparse_impl->values(); + dest_sparse_impl->layout_ = src_sparse_impl->layout_impl(); + } +}; +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/SparseCsrTensorUtils.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/SparseCsrTensorUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..f4095c9bfa04480967e76049b7ac1bb06ac22967 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/SparseCsrTensorUtils.h @@ -0,0 +1,441 @@ +#pragma once + +#include +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#include +#include +#else +#include +#include +#endif + +#define AT_DISPATCH_ALL_SPARSE_COMPRESSED_LAYOUTS(LAYOUT, NAME, ...) \ + [&] { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseCsr: \ + case kSparseCsc: \ + case kSparseBsr: \ + case kSparseBsc: \ + return __VA_ARGS__(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse compressed tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( \ + LAYOUT, NAME, ROW_DIM_ACTION, COLUMN_DIM_ACTION) \ + [&]() { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseCsr: \ + case kSparseBsr: \ + return (ROW_DIM_ACTION)(); \ + case kSparseCsc: \ + case kSparseBsc: \ + return (COLUMN_DIM_ACTION)(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse compressed tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_PLAIN_SPARSE_COMPRESSED_LAYOUTS( \ + LAYOUT, NAME, NO_BLOCK_ACTION, BLOCK_ACTION) \ + [&]() { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseCsr: \ + case kSparseCsc: \ + return (NO_BLOCK_ACTION)(); \ + case kSparseBsr: \ + case kSparseBsc: \ + return (BLOCK_ACTION)(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse compressed tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_SPARSE_ROW_COMPRESSED_LAYOUTS( \ + LAYOUT, NAME, ROW_DIM_ACTION) \ + [&]() { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseCsr: \ + case kSparseBsr: \ + return (ROW_DIM_ACTION)(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse row compressed tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_SPARSE_COL_COMPRESSED_LAYOUTS( \ + LAYOUT, NAME, COL_DIM_ACTION) \ + [&]() { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseCsc: \ + case kSparseBsc: \ + return (COL_DIM_ACTION)(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse column compressed tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_SPARSE_COMPRESSED_NONBLOCK_LAYOUTS(LAYOUT, NAME, ACTION) \ + [&]() { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseCsr: \ + case kSparseCsc: \ + return (ACTION)(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse compressed (non-block) tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_SPARSE_COMPRESSED_BLOCK_LAYOUTS(LAYOUT, NAME, ACTION) \ + [&]() { \ + const auto& the_layout = LAYOUT; \ + switch (the_layout) { \ + case kSparseBsr: \ + case kSparseBsc: \ + return (ACTION)(); \ + default: \ + AT_ERROR( \ + NAME, \ + " expected sparse compressed block tensor layout but got ", \ + the_layout); \ + } \ + }() + +#define AT_DISPATCH_SPARSE_VALUE_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH( \ + TYPE, \ + NAME, \ + AT_DISPATCH_CASE_ALL_TYPES_AND_COMPLEX_AND4( \ + kComplexHalf, kHalf, kBool, kBFloat16, __VA_ARGS__)) + +namespace at::sparse_csr { + +// Implements RAII object to manage checking sparse tensor invariants: +class CheckSparseTensorInvariants { + bool old_state; + + public: + CheckSparseTensorInvariants(bool state) { + old_state = at::globalContext().checkSparseTensorInvariants(); + at::globalContext().setCheckSparseTensorInvariants(state); + } + + ~CheckSparseTensorInvariants() { + at::globalContext().setCheckSparseTensorInvariants(old_state); + } +}; + +using SparseCsrTensor = Tensor; + +inline bool is_sparse_compressed(const Layout& layout) { + switch (layout) { + case kSparseCsr: + case kSparseCsc: + case kSparseBsr: + case kSparseBsc: + return true; + default:; + } + return false; +} + +inline bool is_sparse_compressed(const Tensor& self) { + return is_sparse_compressed(self.layout()); +} + +inline SparseCsrTensorImpl* get_sparse_csr_impl(const SparseCsrTensor& self) { + AT_DISPATCH_ALL_SPARSE_COMPRESSED_LAYOUTS( + self.layout(), "get_sparse_csr_impl", [&] {}); + return static_cast(self.unsafeGetTensorImpl()); +} + +inline std::string layoutToString( + Layout layout, + bool upper = false, + bool lower = false) { + switch (layout) { + case kSparseCsr: + return (upper ? "CSR" : (lower ? "csr" : "Csr")); + case kSparseCsc: + return (upper ? "CSC" : (lower ? "csc" : "Csc")); + case kSparseBsr: + return (upper ? "BSR" : (lower ? "bsr" : "Bsr")); + case kSparseBsc: + return (upper ? "BSC" : (lower ? "bsc" : "Bsc")); + default: + TORCH_CHECK(false, "Not a sparse compressed layout:", layout); + return ""; + } +} + +inline bool isCompressedRow(Layout layout) { + return AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( + layout, "isCompressedRow", [&] { return true; }, [&] { return false; }); +} + +inline bool isCompressedColumn(Layout layout) { + return AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( + layout, + "isCompressedColumn", + [&] { return false; }, + [&] { return true; }); +} + +inline std::string compressedIndicesName(Layout layout) { + return AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( + layout, + "compressedIndicesName", + [&] { return "crow_indices"; }, + [&] { return "ccol_indices"; }); +} + +inline std::string plainIndicesName(Layout layout) { + return AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( + layout, + "plainIndicesName", + [&] { return "col_indices"; }, + [&] { return "row_indices"; }); +} + +inline std::string compressedDimName(Layout layout) { + switch (layout) { + case kSparseCsr: + return "row"; + case kSparseCsc: + return "column"; + case kSparseBsr: + return "row block"; + case kSparseBsc: + return "column block"; + default: + TORCH_CHECK(false, "Not a sparse compressed layout:", layout); + return ""; + } +} + +inline std::string plainDimName(Layout layout) { + switch (layout) { + case kSparseCsr: + return "column"; + case kSparseCsc: + return "row"; + case kSparseBsr: + return "column block"; + case kSparseBsc: + return "row block"; + default: + TORCH_CHECK(false, "Not a sparse compressed layout:", layout); + return ""; + } +} + +inline size_t rowDimension(Layout layout, IntArrayRef size) { + return size.size() - (isCompressedRow(layout) ? 2 : 1); +} + +inline size_t columnDimension(Layout layout, IntArrayRef size) { + return size.size() - (isCompressedColumn(layout) ? 2 : 1); +} + +inline size_t compressedDimension( + Layout layout, + IntArrayRef size, + size_t dense_ndim = 0) { + return size.size() - dense_ndim - (isCompressedRow(layout) ? 2 : 1); +} + +inline size_t plainDimension( + Layout layout, + IntArrayRef size, + size_t dense_ndim = 0) { + return size.size() - dense_ndim - (isCompressedRow(layout) ? 1 : 2); +} + +inline int64_t numBatchDimensions(Tensor const& self) { + return AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( + self.layout(), + "numBatchDimensions", + [&self] { return self.crow_indices().dim() - 1; }, + [&self] { return self.ccol_indices().dim() - 1; }); +} + +inline std::pair getCompressedPlainIndices(Tensor const& self) { + return AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS( + self.layout(), + "getCompressedPlainIndices", + [&self] { + return std::make_pair(self.crow_indices(), self.col_indices()); + }, + [&self] { + return std::make_pair(self.ccol_indices(), self.row_indices()); + }); +} + +inline ScalarType getIndexDtype(Tensor const& self) { + switch (self.layout()) { + case kSparseCsr: + case kSparseBsr: + return self.crow_indices().scalar_type(); + case kSparseCsc: + case kSparseBsc: + return self.ccol_indices().scalar_type(); + case kSparse: + return self._indices().scalar_type(); + default: + return ScalarType::Long; + } +} + +inline Layout flip_compressed_layout(Layout layout) { + switch (layout) { + case kSparseCsr: + return kSparseCsc; + case kSparseCsc: + return kSparseCsr; + case kSparseBsr: + return kSparseBsc; + case kSparseBsc: + return kSparseBsr; + default: + TORCH_CHECK(false, "Not a sparse compressed layout:", layout); + return kSparseCsr; + } +} + +inline DimVector getBlockSize(Tensor const& self) { + int64_t n_batch = numBatchDimensions(self); + return at::DimVector(self.values().sizes().slice(n_batch + 1, 2)); +} + +inline at::OptionalArray getSymIntBlockSize(Tensor const& self) { + if (self.layout() == at::kSparseBsr || self.layout() == at::kSparseBsc) { + int64_t n_batch = numBatchDimensions(self); + return self.values().sym_sizes().slice(n_batch + 1, 2).vec(); + } else { + return {}; + } +} + +template +inline bool only_sparse_compressed_binary_op_trivial_cases( + const Tensor& self, + const Tensor& other, + const Scalar& alpha, + Tensor& out, + const binary_op_t& binary_op, + const binary_op_out_t& binary_op_out) { + // Only sparse compressed! Just like the name says :) + TORCH_INTERNAL_ASSERT(at::sparse_csr::is_sparse_compressed(self)); + TORCH_INTERNAL_ASSERT(at::sparse_csr::is_sparse_compressed(other)); + TORCH_INTERNAL_ASSERT(at::sparse_csr::is_sparse_compressed(out)); + + // Bypass BLAS if there are matches in (self, other, out) + if (self.is_same(out) && self.is_same(other)) { + binary_op_out(self.values(), other.values(), alpha); + return true; + } + if (self.is_same(other)) { + auto [compressed_indices, plain_indices] = + at::sparse_csr::getCompressedPlainIndices(self); + static_cast(out.unsafeGetTensorImpl()) + ->set_member_tensors( + compressed_indices, + plain_indices, + binary_op(self.values(), other.values(), alpha), + self.sizes()); + return true; + } + return false; +} + +inline bool only_sparse_compressed_add_trivial_cases( + const Tensor& self, + const Tensor& other, + const Scalar& alpha, + Tensor& out) { + return only_sparse_compressed_binary_op_trivial_cases( + self, + other, + alpha, + out, + [](const Tensor& v1, const Tensor& v2, const Scalar& alpha) { + return v1.add(v2, alpha); + }, + [](const Tensor& v1, const Tensor& v2, const Scalar& alpha) { + return v1.add_(v2, alpha); + }); +} + +inline Tensor to_type(const Tensor& input, ScalarType dtype) { + auto [compressed_indices, plain_indices] = + at::sparse_csr::getCompressedPlainIndices(input); + return at::_sparse_compressed_tensor_unsafe( + compressed_indices, + plain_indices, + std::move(input.values()).to(dtype), + input.sizes(), + dtype, + input.layout(), + input.device(), + input.options().pinned_memory_opt()); +} + +template +inline std::tuple create_acc_buffer( + TensorOptions option, + ScalarType type, + int64_t nnz = -1) { + Tensor new_values, new_values_acc; + constexpr bool need_acc = !std::is_same_v; + bool is_integral = at::isIntegralType(type, /*includeBool=*/true); + if constexpr (need_acc) { + auto acc_dtype = CppTypeToScalarType::value; + new_values_acc = at::empty({}, option.dtype(acc_dtype)); + new_values = is_integral ? new_values_acc : at::empty({}, option); + } else { + new_values = new_values_acc = at::empty({}, option); + } + if (nnz != -1) { + return std::make_tuple( + new_values.resize_(nnz), new_values_acc.resize_(nnz)); + } else { + return std::make_tuple(new_values, new_values_acc); + } +} + +inline void copy_from_acc_buffer(Tensor& new_values, Tensor& new_values_acc) { + if (!new_values_acc.is_same(new_values)) { + new_values.copy_(new_values_acc); + } +} + +} // namespace at::sparse_csr diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h new file mode 100644 index 0000000000000000000000000000000000000000..5d6285281f23ec9adf7d916d40d743283980e053 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h @@ -0,0 +1,2 @@ +#pragma once +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/Tensor.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/Tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..0b3719cca3bf1ff7154625c510c8292dd47444a7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/Tensor.h @@ -0,0 +1,3 @@ +#pragma once + +#include diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorGeometry.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorGeometry.h new file mode 100644 index 0000000000000000000000000000000000000000..41f14a15ba99c2bb2eb81aeaadbd0b08ac086c4d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorGeometry.h @@ -0,0 +1,144 @@ +#pragma once + +#include +#include + +namespace at { + +// Return if the tensor geometry represented by `sizes` and `strides` is +// contiguous Although we cache is_contiguous in tensor now, this is till useful +// because it allows checking if a particular geometry is contiguous without +// explicitly constructing a tensor, e.g., when you want to choose a kernel +// strategy based on whether a subgeometry is contiguous. +TORCH_API bool geometry_is_contiguous(IntArrayRef sizes, IntArrayRef strides); + +struct TORCH_API TensorGeometry { + TensorGeometry() = default; + + explicit TensorGeometry(c10::SymIntArrayRef sizes) + : sizes_(sizes.vec()), + strides_(sizes.size()), + has_symbolic_sizes_strides_( + !c10::asIntArrayRefSlowOpt(sizes).has_value()) { + int64_t dim = static_cast(sizes.size()); + c10::SymInt expected_stride = 1; + for (int64_t i = dim - 1; i >= 0; i--) { + strides_[i] = expected_stride; + expected_stride *= sizes_[i]; + } + numel_ = expected_stride; + } + + explicit TensorGeometry(const TensorBase& t) + : sizes_(t.sym_sizes().vec()), + strides_(t.sym_strides().vec()), + storage_offset_(t.sym_storage_offset()), + numel_(t.sym_numel()), + has_symbolic_sizes_strides_( + t.unsafeGetTensorImpl()->has_symbolic_sizes_strides()) {} + + // true if the tensor is contiguous + bool is_contiguous() const; + + int64_t dim() const { + return static_cast(sizes_.size()); + } + + int64_t size(int64_t dim) const { + TORCH_INTERNAL_ASSERT(!has_symbolic_sizes_strides_); + dim = c10::maybe_wrap_dim(dim, this->dim()); + return sizes_.at(static_cast(dim)).as_int_unchecked(); + } + c10::IntArrayRef sizes() const { + TORCH_INTERNAL_ASSERT(!has_symbolic_sizes_strides_); + return c10::asIntArrayRefUnchecked(sizes_); + } + int64_t stride(int64_t dim) const { + TORCH_INTERNAL_ASSERT(!has_symbolic_sizes_strides_); + dim = c10::maybe_wrap_dim(dim, this->dim()); + return strides_.at(static_cast(dim)).as_int_unchecked(); + } + c10::IntArrayRef strides() const { + TORCH_INTERNAL_ASSERT(!has_symbolic_sizes_strides_); + return c10::asIntArrayRefUnchecked(strides_); + } + int64_t storage_offset() const { + TORCH_INTERNAL_ASSERT(!has_symbolic_sizes_strides_); + return storage_offset_.as_int_unchecked(); + } + int64_t numel() const { + TORCH_INTERNAL_ASSERT(!has_symbolic_sizes_strides_); + return numel_.as_int_unchecked(); + } + + c10::SymInt sym_size(int64_t dim) const { + dim = c10::maybe_wrap_dim(dim, this->dim()); + return sizes_.at(static_cast(dim)); + } + c10::SymIntArrayRef sym_sizes() const { + return sizes_; + } + c10::SymInt sym_stride(int64_t dim) const { + dim = c10::maybe_wrap_dim(dim, this->dim()); + return strides_.at(static_cast(dim)); + } + c10::SymIntArrayRef sym_strides() const { + return strides_; + } + c10::SymInt sym_storage_offset() const { + return storage_offset_; + } + c10::SymInt sym_numel() const { + return numel_; + } + + TensorGeometry transpose(int64_t dim0, int64_t dim1) { + TensorGeometry r = *this; // copy + TORCH_CHECK( + dim0 < dim(), + "transpose: dim0=", + dim0, + " out of range (dim=", + dim(), + ")") + TORCH_CHECK( + dim1 < dim(), + "transpose: dim1=", + dim1, + " out of range (dim=", + dim(), + ")") + std::swap(r.sizes_[dim0], r.sizes_[dim1]); + std::swap(r.strides_[dim0], r.strides_[dim1]); + return r; + } + + std::vector& mutable_sizes() { + return sizes_; + } + std::vector& mutable_strides() { + return strides_; + } + c10::SymInt& mutable_storage_offset() { + return storage_offset_; + } + void recompute() { + // recalculate numel after a change + c10::SymInt numel = 1; + for (const auto& i : sizes_) { + numel = numel * i; + } + numel_ = std::move(numel); + has_symbolic_sizes_strides_ = + !c10::asIntArrayRefSlowOpt(sizes_).has_value(); + } + + private: + std::vector sizes_; + std::vector strides_; + c10::SymInt storage_offset_; + c10::SymInt numel_; + bool has_symbolic_sizes_strides_{false}; +}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorIterator.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorIterator.h new file mode 100644 index 0000000000000000000000000000000000000000..e9e9e0c8e8bfe6eab46a9a071c753ba3de454c0c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorIterator.h @@ -0,0 +1,1028 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace at { +class Tensor; +class OptionalTensorRef; +using NameVector = SmallVector; +} // namespace at + +// TensorIterator is a helper class for element-wise operations, such as +// arithmetic, comparisons, and trigonometric functions. It handles +// broadcasting and type conversions of operands. +// +// This is inspired by NumPy's Array Iterator API (NpyIter). +// +// The files Loops.h and Loops.cuh provide functions to build kernels that +// use TensorIterator. +// +// Example: +// +// auto iter = TensorIteratorConfig() +// .add_output(output) +// .add_input(input) +// .build() +// +// [MyKernel.cpp / MyKernel.cu] +// cpu_kernel(iter, [](float a, float b) { +// return a + b; +// }); +// +// gpu_kernel(iter, []GPU_LAMBDA(float a, float b) -> float { +// return a + b; +// }); +// +// Note [Order of Construction] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// When setting up the tensor iterator configuration, the output Tensors +// have to be added first via +// TensorIteratorConfig::add_owned_output(at::Tensor). After adding all outputs, +// the inputs can be added via +// TensorIteratorConfig::add_owned_input(at::Tensor). +// Adding another output after inputs have been added will rise an exception. +// +// Note [Common Dtype Computation] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Some operations have a natural notion of a "common dtype" or +// "computation dtype" where all inputs are cast to one dtype, the +// operation is performed, and then the results are cast to all outputs. +// +// TensorIterator infers a common dtype if all inputs have the same dtype, +// and it computes one using type promotion rules on its inputs if +// promote_inputs_to_common_dtype_ is true. Attempting to query +// a common dtype otherwise will throw an exception. +// +// Note that the outputs are not considered when computing a common dtype. + +namespace at { + +namespace internal { +// This parameter is heuristically chosen to determine the minimum number of +// work that warrants parallelism. For example, when summing an array, it is +// deemed inefficient to parallelise over arrays shorter than 32768. Further, +// no parallel algorithm (such as parallel_reduce) should split work into +// smaller than GRAIN_SIZE chunks. +constexpr int64_t GRAIN_SIZE = 32768; + +// Storage for a non-owning Tensor, without needing to include Tensor.h +class TORCH_API OpaqueOptionalTensorRef { + alignas(alignof(TensorBase)) std::array data_{}; + + public: + OpaqueOptionalTensorRef(); + OpaqueOptionalTensorRef(const OpaqueOptionalTensorRef&) = default; + OpaqueOptionalTensorRef& operator=(const OpaqueOptionalTensorRef&) = default; + OpaqueOptionalTensorRef(OpaqueOptionalTensorRef&&) noexcept = default; + OpaqueOptionalTensorRef& operator=(OpaqueOptionalTensorRef&&) noexcept = + default; + ~OpaqueOptionalTensorRef(); + + OptionalTensorRef* get() { + return reinterpret_cast(data_.data()); + } + const OptionalTensorRef* get() const { + return reinterpret_cast(data_.data()); + } + + OptionalTensorRef& operator*() { + return *get(); + } + const OptionalTensorRef& operator*() const { + return *get(); + } + OptionalTensorRef* operator->() { + return get(); + } + const OptionalTensorRef* operator->() const { + return get(); + } + + const Tensor& getTensor() const; +}; +} // namespace internal + +struct TORCH_API OperandInfo { + using StrideVector = SmallVector; + OperandInfo() = default; + C10_ALWAYS_INLINE explicit OperandInfo(c10::MaybeOwned&& t) { + if (t->defined()) { + device = t->device(); + target_dtype = t->scalar_type(); + current_dtype = target_dtype; + } + tensor(std::move(t)); + validate(); + } + + C10_ALWAYS_INLINE OperandInfo(const OperandInfo&) = default; + C10_ALWAYS_INLINE OperandInfo& operator=(const OperandInfo&) = default; + C10_ALWAYS_INLINE OperandInfo(OperandInfo&&) noexcept = default; + C10_ALWAYS_INLINE OperandInfo& operator=(OperandInfo&&) noexcept = default; + C10_ALWAYS_INLINE ~OperandInfo() = default; + + /// The data pointer. This may be different from tensor->data_ptr() if the + /// iterator is split. + void* data = nullptr; + + /// Stride after broadcasting. The stride is in bytes, not number of elements. + StrideVector stride_bytes; + + /// The desired device and type for the operand. For inputs, this specifies + /// that the input should be converted to this type if necessary. For outputs, + /// this specifies which type to allocate. target_dtype and device are + /// initialized with the dtype and device of the tensor but during type + /// promotion target_dtype value can become different from tensor's dtype + /// also, during type promotion target_dtype and device can be set for an + /// undefined tensor so that tensor can be properly constructed later. + std::optional device = std::nullopt; + ScalarType target_dtype = ScalarType::Undefined; + // Caches dtype of the tensor, because scalar_type is an expensive operation + // If dtype of the tensor is changed (e.g. as a result of type promotion or in + // allocate_outputs), this + // value should be changed too. + ScalarType current_dtype = ScalarType::Undefined; + + bool is_device_defined() const { + return device.has_value(); + } + bool is_type_defined() const { + return target_dtype != ScalarType::Undefined; + } + TensorOptions options() const { + return TensorOptions(target_dtype).device(device); + } + + bool is_output = false; + + // will_resize is only for output tensor. + // 1) Functional call(like torch.add(self, other)): output tensor is + // undefined, and pytorch creates a new tensor by using common shape + // and computed stride in TensorIterator; + // 2) Inplace call(like torch.add_(self, other)): output tensor is same + // with input tensor, and can't to modify tensor's size and stride; + // 3) Op call with output(like torch.add(self, other, out = output)): + // output tensor is defined, but tensor shape maybe different with common + // shape. If tensor shape is not same with common shape, this output + // tensor will be resized by using common shape and computed stride in + // TensorIterator. Otherwise can't modify tensor's size and stride. + bool will_resize = false; + + bool is_read_write = false; + + bool is_const = false; + + void validate() { + TORCH_CHECK( + !tensor_base_->defined() || tensor_base_->layout() == kStrided, + "unsupported tensor layout: ", + tensor_base_->layout()); + } + + /// The tensor operand. Note that the strides, data pointer, and + /// other attributes may differ due to dimension reordering and + /// coalescing. + const Tensor& tensor() const { + return tensor_storage_.getTensor(); + } + const TensorBase& tensor_base() const { + return *tensor_base_; + } + void tensor(c10::MaybeOwned&& tensor); + + // Save the original tensor operand in cases when an output is modified + // (e.g. if dtype is changed) + const Tensor& original_tensor() const { + return original_tensor_storage_.getTensor(); + } + const TensorBase& original_tensor_base() const { + return *original_tensor_base_; + } + + // Set tensor to a new value, and store the old tensor value in + // original_tensor Should only ever be called once for the lifetime of an + // operand + void exchange_tensor(c10::MaybeOwned&& new_tensor); + + // Move original_tensor back into tensor, exchange_tensor must have been + // called before + void restore_original_tensor(); + + private: + c10::MaybeOwned tensor_base_; + c10::MaybeOwned original_tensor_base_ = + c10::MaybeOwned::owned(std::in_place); + + // We store TensorBase visibly in the header to allow inline access. + // However, we sometimes need a genuine `const Tensor &` for the + // TensorIterator API. So, we also store a non-owning `Tensor` + // object in these `_storage_` variables. + internal::OpaqueOptionalTensorRef tensor_storage_; + internal::OpaqueOptionalTensorRef original_tensor_storage_; +}; + +struct SplitUntil32Bit; + +enum class FastSetupType : uint8_t { + NONE, + CONTIGUOUS, + CHANNELS_LAST, + NON_OVERLAPPING_DENSE +}; + +class TensorIteratorConfig; +struct TensorIterator; + +struct TORCH_API TensorIteratorBase : public impl::MetaBase { + using DimMask = std::bitset<64>; + using PtrVector = SmallVector; + using StrideVector = SmallVector; + + TensorIteratorBase(); + void build(TensorIteratorConfig&); + + // The inner-loop function operates on the fastest moving dimension. It + // implements element-wise operations in terms of 1-d strided tensors. + // + // Arguments: + // data: data pointers for each operand (length `ntensors`) + // strides: stride for each operand (length `ntensors`) + // size: size of inner loop + // + // The `size` often matches shape[0], but may be smaller due to + // parallelization of the inner loop. + using loop2d_t = c10::function_ref< + void(char** data, const int64_t* strides, int64_t size0, int64_t size1)>; + + using loop_subiter_t = c10::function_ref; + + void foreach_reduced_elt(loop_subiter_t loop, bool parallelize = true); + + int ndim() const { + return static_cast(shape_.size()); + } + IntArrayRef shape() const { + return shape_; + } + int64_t numel() const; + int ntensors() const { + return static_cast(operands_.size()); + } + int noutputs() const { + return num_outputs_; + } + int ninputs() const { + return ntensors() - noutputs(); + } + IntArrayRef view_offsets() const { + return view_offsets_; + } + + /// number of elements in the output operand. this is the same as numel() for + /// operations that are not reductions. + int64_t num_output_elements() const; + + /// number of reduced dimensions in a reduction operation + int num_reduce_dims() const; + + /// 1-dimensional iteration and no buffering or type conversion + bool is_trivial_1d() const; + /// Reducible to 1-dimensional and all operands are contiguous + bool is_contiguous() const; + bool is_dim_reduced(int dim) const; + + /// Accessors for each operand + IntArrayRef strides(int64_t arg) const { + return operands_[arg].stride_bytes; + } + void* data_ptr(int64_t arg) const; + ScalarType dtype(int64_t arg = 0) const { + return operands_[arg].current_dtype; + } + ScalarType common_dtype() const { + TORCH_INTERNAL_ASSERT( + common_dtype_ != ScalarType::Undefined, + "Queried for invalid common dtype!"); + return common_dtype_; + } + ScalarType input_dtype(int64_t arg = 0) const { + return operands_[num_outputs_ + arg].current_dtype; + } + Device device(int64_t arg = 0) const { + return operands_[arg].device.value(); + } + c10::DeviceType device_type(int64_t arg = 0) const { + return device(arg).type(); + } + int64_t element_size(int64_t arg) const { + return static_cast(elementSize(dtype(arg))); + } + bool is_scalar(int64_t arg) const; + bool is_cpu_scalar(int64_t arg) const; + + const TensorBase& tensor_base(int64_t arg) const { + return operands_[arg].tensor_base(); + } + const Tensor& tensor(int64_t arg) const { + return operands_[arg].tensor(); + } + + const TensorBase& output_base(int64_t arg = 0) const { + AT_ASSERT(arg < num_outputs_); + return tensor_base(arg); + } + + const Tensor& output(int64_t arg = 0) const { + AT_ASSERT(arg < num_outputs_); + return tensor(arg); + } + + const TensorBase& input_base(int64_t arg = 0) const { + AT_ASSERT(arg >= 0 && arg < ntensors() - num_outputs_); + return tensor_base(num_outputs_ + arg); + } + const Tensor& input(int64_t arg = 0) const { + AT_ASSERT(arg >= 0 && arg < ntensors() - num_outputs_); + return tensor(num_outputs_ + arg); + } + + // Copies from temporary outputs back to the original outputs + // NOTE: only used on CPU + void cast_outputs(); + + /// Removes an operand from this iterator + void remove_operand(int64_t arg); + /// Shrinks an iterated dimension + void narrow(int dim, int64_t start, int64_t size); + /// Narrows every dim after and including `start_dim` to size one. + void select_all_keeping_dim(int start_dim, IntArrayRef starts); + /// Replaces the data pointer for the operand at index `arg`. + /// The new pointer should have the same sizes, strides and dtype as the + /// original + void unsafe_replace_operand(int64_t arg, void* data); + + /// Splits this TensorIterator into two iterators. Together they iterate over + /// the entire operation. Used by `with_32bit_indexing()`. + std::unique_ptr split(int dim); + + /// Returns the dimension with the largest extent: (size[dim]-1) * stride[dim] + int get_dim_to_split() const; + + template + T scalar_value(int64_t arg) { + auto& op = operands_[arg]; + return c10::fetch_and_cast(op.tensor_base().scalar_type(), op.data); + } + + /// Return scalar value from original_tensor_base if it is defined. When + /// common_dtype is Half, casting scalar input to common_dtype might overflow. + /// If the scalar is aleady given in the type of Half, then return scalar + /// value from tensor_base. + template + T original_scalar_value(int64_t arg) { + auto& original_tensor_base = operands_[arg].original_tensor_base(); + if (original_tensor_base.defined()) { + TORCH_INTERNAL_ASSERT( + original_tensor_base.scalar_type() != common_dtype()); + return c10::fetch_and_cast( + original_tensor_base.scalar_type(), + original_tensor_base.const_data_ptr()); + } else { + return scalar_value(arg); + } + } + + private: + template + auto loop_2d_from_1d(const loop1d_t& loop) { + return + [loop, ntensor = ntensors()]( + char** base, const int64_t* strides, int64_t size0, int64_t size1) { + PtrVector data(base, base + ntensor); + const int64_t* outer_strides = &strides[ntensor]; + for (const auto i : c10::irange(size1)) { + if (i > 0) { + for (const auto arg : c10::irange(ntensor)) { + data[arg] += outer_strides[arg]; + } + } + loop(data.data(), strides, size0); + } + }; + } + + public: + template < + typename loop1d_t, + std::enable_if_t< + std::is_convertible_v< + loop1d_t, + c10::function_ref< + void(char**, const int64_t* strides, int64_t size)>>, + int> = 0> + void for_each(loop1d_t loop, int64_t grain_size = at::internal::GRAIN_SIZE) { + for_each(loop_2d_from_1d(loop), grain_size); + } + + void for_each(loop2d_t loop, int64_t grain_size = at::internal::GRAIN_SIZE); + + void parallel_reduce(loop2d_t loop); + + template < + typename loop1d_t, + std::enable_if_t< + std::is_convertible_v< + loop1d_t, + c10::function_ref< + void(char**, const int64_t* strides, int64_t size)>>, + int> = 0> + void serial_for_each(loop1d_t loop, Range range) { + serial_for_each(loop_2d_from_1d(loop), range); + } + + void serial_for_each(loop2d_t loop, Range range) const; + + /// Create a strides array for a Tensor with shape of this iterator. The + /// parameter `element_size` specifies the size of Tensor's data type in + /// bytes (e.g. `4` for `float`) + StrideVector compatible_stride(int64_t element_size) const; + + /// Inverts the re-ordering done by reorder_dimensions. This can only be + /// called *before* coalesce_dimensions() is called. + DimVector invert_perm(IntArrayRef input) const; + + /// Reapply same re-ordering as it is done by reorder_dimensions. This can + /// only be called *before* coalesce_dimensions() is called. + DimVector apply_perm_and_mul(IntArrayRef input, int mul) const; + + /// Helper functions for CPU iteration + StrideVector get_dim_strides(int dim) const; + StrideVector get_strides() const; + StrideVector get_inner_strides() const { + return get_dim_strides(0); + } + PtrVector get_base_ptrs() const; + + // Helper functions for advanced stride manipulations (e.g. torch.flip) + void _unsafe_set_arg_strides(const int64_t arg, IntArrayRef strides) { + operands_[arg].stride_bytes = strides; + } + void _unsafe_set_arg_data(const int64_t arg, void* data) { + operands_[arg].data = data; + } + + // Helper functions for custom device, custom device can get OperandInfo and + // NameVector in their side. + const OperandInfo& operand(int arg = 0) const { + return operands_[arg]; + } + OperandInfo& operand(int arg = 0) { + return operands_[arg]; + } + NameVector& get_dim_names() { + return names_; + } + const NameVector& get_dim_names() const { + return names_; + } + + /// true if the stride computation can use 32-bit arithmetic. Used by GPU + /// kernels + bool can_use_32bit_indexing() const; + + /// An "iteratable" object that recursively splits this iterator into + /// sub-iterators that can use 32-bit indexing. + SplitUntil32Bit with_32bit_indexing() const; + + /// If the kernel should accumulate into the output. Only relevant for CUDA + /// reductions. + bool should_accumulate() const { + return accumulate_; + } + + /// Whether this iterator produces the actual output, + /// as opposed to something that will be accumulated further. Only relevant + /// for CUDA reductions. + bool is_final_output() const { + return final_output_; + } + + bool has_contiguous_first_dim() const { + if (ndim() == 0) { + return true; + } + + int num_tensors = ntensors(); + for (const auto i : c10::irange(num_tensors)) { + if (strides(i)[0] != element_size(i)) { + return false; + } + } + return true; + } + + void set_output_raw_strided( + int64_t output_idx, + IntArrayRef sizes, + IntArrayRef strides, + TensorOptions options, + DimnameList names) override; + +#define TORCH_DISALLOW_TEMPORARIES_IMPL(methodname, maybestatic) \ + maybestatic void methodname( \ + TensorBase&& out, const TensorBase& a, const TensorBase& b) = delete; \ + maybestatic void methodname( \ + const TensorBase& out, TensorBase&& a, const TensorBase& b) = delete; \ + maybestatic void methodname( \ + const TensorBase& out, const TensorBase& a, TensorBase&& b) = delete; \ + maybestatic void methodname( \ + TensorBase&& out, TensorBase&& a, const TensorBase& b) = delete; \ + maybestatic void methodname( \ + TensorBase&& out, const TensorBase& a, TensorBase&& b) = delete; \ + maybestatic void methodname( \ + const TensorBase& out, TensorBase&& a, TensorBase&& b) = delete; \ + maybestatic void methodname( \ + TensorBase&& out, TensorBase&& a, TensorBase&& b) = delete; + +#define TORCH_DISALLOW_TEMPORARIES(methodname) \ + TORCH_DISALLOW_TEMPORARIES_IMPL(methodname, ) + + void build_binary_float_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + void build_borrowing_binary_float_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + TORCH_DISALLOW_TEMPORARIES(build_borrowing_binary_float_op) + void build_binary_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + void build_borrowing_binary_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + TORCH_DISALLOW_TEMPORARIES(build_borrowing_binary_op) + void build_unary_float_op(const TensorBase& out, const TensorBase& a); + void build_borrowing_unary_float_op( + const TensorBase& out, + const TensorBase& a); + TORCH_DISALLOW_TEMPORARIES(build_borrowing_unary_float_op) + void build_unary_op(const TensorBase& out, const TensorBase& a); + // Odd special case needed for pow. Has to borrow the output because + // it's a structured kernel, but the argument is potentially a copy. + void build_output_borrowing_argument_owning_unary_op( + const TensorBase& out, + const TensorBase& a); + void build_borrowing_unary_op(const TensorBase& out, const TensorBase& a); + TORCH_DISALLOW_TEMPORARIES(build_borrowing_unary_op) + void build_borrowing_unary_force_boolean_op( + const TensorBase& out, + const TensorBase& a); + TORCH_DISALLOW_TEMPORARIES(build_borrowing_unary_force_boolean_op) + void build_comparison_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + void build_borrowing_comparison_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + TORCH_DISALLOW_TEMPORARIES(build_borrowing_comparison_op) + // Another special case: we need to own the second argument for comparison + // ops. + void build_borrowing_except_last_argument_comparison_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + void build_ternary_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b, + const TensorBase& c); + +#undef TORCH_DISALLOW_TEMPORARIES + protected: + // Mutable reference as it moves tensors out of TensorIteratorConfig + void populate_operands(TensorIteratorConfig&); + void mark_outputs(); + void mark_resize_outputs(const TensorIteratorConfig&); + void compute_mem_overlaps(const TensorIteratorConfig&); + void compute_shape(const TensorIteratorConfig&); + void compute_strides(const TensorIteratorConfig&); + void reorder_dimensions(); + void permute_dimensions(IntArrayRef perm); + void compute_types(const TensorIteratorConfig&); + ScalarType compute_common_dtype(); + void allocate_or_resize_outputs(); + bool fast_set_up(const TensorIteratorConfig&); + FastSetupType compute_fast_setup_type(const TensorIteratorConfig&); + void compute_names(const TensorIteratorConfig&); + void propagate_names_to_outputs(); + void coalesce_dimensions(); + + protected: + /// Records the "computation" shape of the output tensor. The computation + /// shape is different from the regular shape in a few ways: + /// + /// - The shape may be permuted (via permute_dimensions) so that we + /// process the dimensions in the most computationally efficient order + /// (rather than the logical order given to us by the users.) + /// - The shape may have adjacent dimensions collapsed (via + /// coalesce_dimensions) so that we minimize the number of + /// dimensions we have to explicitly iterate over. For example, + /// a pointwise operation on a contiguous tensor "computationally" + /// consists of only a single dimension. + /// + /// In other words, the computation shape is the output shape as it + /// actually matters for implementing the kernel, but not necessarily the + /// output shape that the user will see in the end. + /// + /// The lifecycle of mutations to shape_ in TensorIterator: + /// - declare_static_shape() sets an initial shape explicitly + /// provided by user, otherwise + /// - compute_shape() computes the true (non-computational) shape + /// specified by the user. + /// - reorder_dimensions() reorders dimensions to improve coalescing. + /// - coalesce_dimensions() then coalesces adjacent dimensions when + /// possible. + /// + /// The shape may also be further modified if we create sub-TensorIterators, + /// e.g., via narrow or select_all_keeping_dim. + DimVector shape_; + + /// Temporarily records the permutation computed by reorder_dimensions. + /// This permutation maps the computation output dimension (dim) to + /// the original true output dimension (perm_[dim]). It is used by + /// invert_perm to undo the permutation. After coalesce_dimensions is + /// called, the permutation is no longer valid (as, in general, there + /// is no permutation that will make computation dimensions to + /// output dimensions); methods that manipulate perm_ are obligated + /// to test that !has_coalesced_dimensions + DimVector perm_; + + /// Has coalesce_dimensions() (or any moral equivalent, e.g., fast_build()) + /// been called? This is SOLELY used to check validity of perm_. + bool has_coalesced_dimensions_ = false; + + /// Whether iteration must be fixed. This disables dimension permuting and + /// also changes how for_each divides work among threads. + bool enforce_linear_iteration_ = false; + + /// The index offsets into the original tensors for each dimension. + /// This is only non-zero when you narrow() a TensorIterator (e.g., + /// when you make sub-TensorIterators). + DimVector view_offsets_; + + /// The computed names of the output tensor. Computed by compute_names() + NameVector names_; + + /// The operands of the TensorIterator: both the inputs and outputs. The + /// outputs MUST come first in the operands_ list. There is always an + /// operand for each output of the TensorIterator, even if TensorIterator + /// will ultimately be responsible for allocating the output; in those + /// cases, tensor is simply undefined (and will be populated later + /// during build()). + /// + /// This list is initially populated prior to build(), but build() mutates + /// OperandInfo to populate more information. + SmallVector operands_; + + /// Number of outputs in operands_ (the length of the outputs prefix + /// in operands_). + int num_outputs_ = 0; + + /// Whether or not all operands have the same shape and are 1d+. Having all + /// the same shape affects whether or not the iterator is eligible for fast + /// setup. + bool all_ops_same_shape_ = false; + /// Whether or not all operands are 0d, this affects type promotion + bool all_ops_are_scalars_ = false; + + /// The "computation" dtype of TensorIterator, specifying what the dtype + /// we will do the internal computation in TensorIterator. Typically, + /// this matches the dtype of the output tensors, but not always! + ScalarType common_dtype_ = ScalarType::Undefined; + + /// This is currently defined as kCPU, or the device of the first non-CPU + /// tensor argument. See TensorIteratorBase::compute_types for details. + Device common_device_ = kCPU; + + /// Set by split(), see should_accumulate() and is_final_output() + bool accumulate_ = false; + bool final_output_ = true; + + // From TensorIteratorConfig + bool is_reduction_ = false; + + /// Set by populate_operands(), says if we're handling meta tensors + bool is_meta_ = false; +}; + +struct TORCH_API TensorIterator final : public TensorIteratorBase { + TensorIterator() : TensorIteratorBase() {} + // Slicing is OK, TensorIterator guaranteed NOT to have any fields + TensorIterator(const TensorIteratorBase& iter) : TensorIteratorBase(iter) {} + +#define TORCH_DISALLOW_TEMPORARIES(methodname) \ + TORCH_DISALLOW_TEMPORARIES_IMPL(methodname, static) + + static TensorIterator binary_float_op( + TensorBase& out, + const TensorBase& a, + const TensorBase& b); + static TensorIterator binary_op( + TensorBase& out, + const TensorBase& a, + const TensorBase& b); + static TensorIterator borrowing_binary_op( + const TensorBase& out, + const TensorBase& a, + const TensorBase& b); + TORCH_DISALLOW_TEMPORARIES(borrowing_binary_op) + static TensorIterator comparison_op( + TensorBase& out, + const TensorBase& a, + const TensorBase& b); + static TensorIterator unary_op(TensorBase& out, const TensorBase& a); + static TensorIterator unary_float_op(TensorBase& out, const TensorBase& a); + static TensorIterator nullary_op(TensorBase& out); + static TensorIterator borrowing_nullary_op(const TensorBase& out); + static TensorIterator borrowing_nullary_op(TensorBase&& out) = delete; + static TensorIterator reduce_op(TensorBase& out, const TensorBase& a); + static TensorIterator reduce_op( + TensorBase& out1, + TensorBase& out2, + const TensorBase& a); +#undef TORCH_DISALLOW_TEMPORARIES +#undef TORCH_DISALLOW_TEMPORARIES_IMPL + + const Tensor& maybe_get_output(int64_t output_idx) override; + void set_output_raw_strided( + int64_t output_idx, + IntArrayRef sizes, + IntArrayRef strides, + TensorOptions options, + DimnameList names) override; +}; + +class TORCH_API TensorIteratorConfig final { + public: + friend struct TensorIteratorBase; + friend struct TensorIterator; + + TensorIteratorConfig() = default; + + C10_DISABLE_COPY_AND_ASSIGN(TensorIteratorConfig); + + /// Construction + // Stores input/output Tensors without incrementing the reference count. + // Important: the outputs have to be added before the inputs. + TensorIteratorConfig& add_output(const TensorBase& output) { + return add_borrowed_output(output); + } + TensorIteratorConfig& add_input(const TensorBase& input) { + return add_borrowed_input(input); + } + TensorIteratorConfig& add_const_input(const TensorBase& input) { + return add_borrowed_const_input(input); + } + + // Borrowing from temporaries is unlikely to go well. + TensorIteratorConfig& add_output(TensorBase&& output) = delete; + TensorIteratorConfig& add_input(TensorBase&& input) = delete; + TensorIteratorConfig& add_const_input(TensorBase&& input) = delete; + + // Stores input/output Tensors while incrementing the reference count. + // Note that add_{in,out}put are nearly always what you + // want, and the exception (adding an unnamed temporary) won't + // compile. + TensorIteratorConfig& add_owned_output(const TensorBase& output); + TensorIteratorConfig& add_owned_input(const TensorBase& input); + TensorIteratorConfig& add_owned_const_input(const TensorBase& input); + + // Advanced API: stores input/output Tensors without incrementing + // the reference count. The caller must ensure that these Tensors + // live at least as long as this TensorIteratorConfig and any + // TensorIteratorBase built from this TensorIteratorConfig. + // Important: the outputs have to be added before the inputs. + TensorIteratorConfig& add_borrowed_output(const TensorBase& output); + TensorIteratorConfig& add_borrowed_input(const TensorBase& input); + TensorIteratorConfig& add_borrowed_const_input(const TensorBase& input); + + // Borrowing from temporaries is unlikely to go well. + TensorIteratorConfig& add_borrowed_output(TensorBase&& output) = delete; + TensorIteratorConfig& add_borrowed_input(TensorBase&& input) = delete; + TensorIteratorConfig& add_borrowed_const_input(TensorBase&& input) = delete; + + // Sets the check_mem_overlap_ flag, which is true by default. + // If true, inputs are checked for partial overlap with the outputs and + // outputs are checked for internal overlap (e.g. broadcasted views). An error + // is raised if unacceptable overlap is detected. + // If you're migrating an existing operator to using TensorIterator, please + // consider if the previous implementation checked memory overlap. If it did + // not, and if the operator is idempotent (for example, Tensor.fill_(0)), then + // checking memory overlap is BC-breaking. Please don't check memory overlap + // in that case. + TensorIteratorConfig& set_check_mem_overlap(bool check_mem_overlap) { + check_mem_overlap_ = check_mem_overlap; + return *this; + } + + // Sets the check_all_same_dtype_ flag, which is true by default + // If true, checks that all inputs and defined outputs have the same dtype + // Setting either of promote_inputs_to_common_dtype_ + // or cast_common_dtype_to_outputs_ to true will set + // check_all_same_dtype_ to false. + TensorIteratorConfig& check_all_same_dtype(const bool _check_all_same_dtype) { + check_all_same_dtype_ = _check_all_same_dtype; + return *this; + } + + // Sets the check_all_same_device_ flag, which is true by default + // If true, all operands must be on the same device, with the possible + // exception of CPU scalars, which can be passed to some CUDA kernels + // as kernel arguments. + TensorIteratorConfig& check_all_same_device( + const bool _check_all_same_device) { + check_all_same_device_ = _check_all_same_device; + return *this; + } + + // Sets the enforce_safe_casting_to_output_ flag, which is false by default + // If true, the iterator's "common dtype" must be computable + // (see the [Common Dtype Computation] note) and + // canCast(common dtype, output dtype) must be true for all outputs. + TensorIteratorConfig& enforce_safe_casting_to_output( + const bool _enforce_safe_casting_to_output) { + enforce_safe_casting_to_output_ = _enforce_safe_casting_to_output; + return *this; + } + + // Sets the enforce_linear_iteration_ flag, which is false by default. + // If true, iteration goes in the same order as a C-contiguous tensor + // is layed out in memory. i.e. last dimension iterates fastest. + // + // This iteration order can be less efficient and may even prevent + // vectorization. So only use if the correctness of your kernel depends on it. + TensorIteratorConfig& enforce_linear_iteration( + const bool _enforce_linear_iteration = true) { + enforce_linear_iteration_ = _enforce_linear_iteration; + return *this; + } + + // Sets the promote_inputs_to_common_dtype_ flag, which is false by default + // If true, the iterator's "common dtype" is always computed (see the + // [Common Dtype Computation] note) and, on the CPU, temporary copies of + // the inputs in the common dtype are passed as the actual inputs to + // the operation. + // Setting this flag to true sets check_all_same_dtype_ to false. + TensorIteratorConfig& promote_inputs_to_common_dtype( + const bool _promote_inputs_to_common_dtype) { + promote_inputs_to_common_dtype_ = _promote_inputs_to_common_dtype; + if (_promote_inputs_to_common_dtype) { + check_all_same_dtype_ = false; + } + return *this; + } + + // Sets the promote_integer_inputs_to_float_ flag, which is false by default + // NOTE: If set to true, the promote_inputs_to_common_dtype_ must also be + // true. If true, if the iterator's "common dtype" is an integral type + // (including bool) + // then it is changed to the default float scalar type. + TensorIteratorConfig& promote_integer_inputs_to_float( + const bool _promote_integer_inputs_to_float) { + promote_integer_inputs_to_float_ = _promote_integer_inputs_to_float; + TORCH_INTERNAL_ASSERT( + !promote_integer_inputs_to_float_ || promote_inputs_to_common_dtype_); + return *this; + } + + TensorIteratorConfig& is_reduction(const bool _is_reduction) { + is_reduction_ = _is_reduction; + return *this; + } + + TensorIteratorConfig& allow_cpu_scalars(const bool _allow_cpu_scalars) { + allow_cpu_scalars_ = _allow_cpu_scalars; + return *this; + } + + // Sets the cast_common_dtype_to_outputs_ flag, which is false by default + // If true, the iterator's "common dtype" must be computatable + // (see the [Common Dtype Computation] note) and, on the CPU, temporary + // copies of the outputs are passed as the actual output to the operation. + // These temporaries are then copied to the original outputs after + // the operation is performed (see cast_outputs()). + // Setting this flag to true sets check_all_same_dtype_ to false. + TensorIteratorConfig& cast_common_dtype_to_outputs( + const bool _cast_common_dtype_to_outputs) { + cast_common_dtype_to_outputs_ = _cast_common_dtype_to_outputs; + if (_cast_common_dtype_to_outputs) { + check_all_same_dtype_ = false; + } + return *this; + } + + TensorIteratorConfig& resize_outputs(bool resize_outputs) { + resize_outputs_ = resize_outputs; + return *this; + } + + // Bypass output dtype/device computation and fix the dtype/device as + // specified here. + TensorIteratorConfig& declare_static_dtype_and_device( + ScalarType dtype, + Device device); + TensorIteratorConfig& declare_static_dtype(ScalarType dtype); + TensorIteratorConfig& declare_static_device(Device device); + TensorIteratorConfig& declare_static_shape(IntArrayRef shape); + TensorIteratorConfig& declare_static_shape( + IntArrayRef shape, + IntArrayRef squash_dims); + + // It would be better if this was && qualified, but this would be at the cost + // of a lot of boilerplate above + TensorIterator build() { + TensorIterator iter; + iter.build(*this); + return iter; + } + + private: + bool is_tensor_const(size_t idx); + + SmallVector, 4> tensors_; + int num_outputs_ = 0; + int num_inputs_ = 0; + + std::optional static_shape_ = std::nullopt; + std::optional static_dtype_ = std::nullopt; + std::optional static_device_ = std::nullopt; + bool check_mem_overlap_ = true; + bool allow_cpu_scalars_ = false; + bool is_reduction_ = false; + bool resize_outputs_ = true; + bool check_all_same_dtype_ = true; + bool check_all_same_device_ = true; + bool enforce_safe_casting_to_output_ = false; + bool enforce_linear_iteration_ = false; + bool promote_inputs_to_common_dtype_ = false; + bool promote_integer_inputs_to_float_ = false; + bool cast_common_dtype_to_outputs_ = false; + + SmallVector const_tensor_indices_; +}; + +/// A container-like struct that acts as if it contains splits of a +/// TensorIterator that can use 32-bit indexing. Taken together the splits cover +/// the original TensorIterator. +struct TORCH_API SplitUntil32Bit { + struct TORCH_API iterator { + iterator() = default; + iterator(const TensorIteratorBase& iter); + iterator(iterator&&) = default; + + // Guaranteed to be a TensorIterator proper! + TensorIterator& operator*() const; + iterator& operator++(); + bool operator==(const iterator& other) const { + // two iterators are equal if they are the same object or they're both + // empty + return this == &other || (vec.empty() && other.vec.empty()); + } + // needed for C++11 range-based for loop + bool operator!=(const iterator& other) const { + return !(*this == other); + } + + /// stack of TensorIterators to be split + std::vector> vec; + }; + + SplitUntil32Bit(const TensorIteratorBase& iter) : iter(iter) {} + + iterator begin() const; + iterator end() const; + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const TensorIteratorBase& iter; +}; + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorMeta.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorMeta.h new file mode 100644 index 0000000000000000000000000000000000000000..7576d1fd1fdd56dc4651fc79d3af0ed0d62a1baf --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorMeta.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include +#include +#include + +namespace at { + +class Tensor; + +namespace impl { + +// Use this to define the prototype for a meta function. There are two +// versions; one that takes one argument (just the operator name), or FUNC2 +// variant that takes two arguments (operator name and overload name). +// +// Example usage: +// +// TORCH_META_FUNC2(add, Tensor) ( +// const Tensor& self, const Tensor& other +// ) { +// ... compute sizes and options ... +// set_output(sizes, options); +// } +// +#define TORCH_META_FUNC(name) void structured_##name::meta +#define TORCH_META_FUNC2(name, overload) \ + void structured_##name##_##overload::meta + +// These are versions of TORCH_META_FUNC(2) that include a precompute_out struct +// as a return value. They should be used when the kernel in question has +// precomputed values declared in native_functions.yaml and the corresponding +// implementation should return an instance of the aforementioned struct. +#define TORCH_PRECOMPUTE_META_FUNC(name) \ + structured_##name::meta_return_ty structured_##name::meta +#define TORCH_PRECOMPUTE_META_FUNC2(name, overload) \ + structured_##name##_##overload::meta_return_ty \ + structured_##name##_##overload::meta + +// Use this to create a precompute struct in a meta function. +#define TORCH_PRECOMPUTE_STRUCT(name) structured_##name::precompute_out<> +#define TORCH_PRECOMPUTE_STRUCT2(name, overload) \ + structured_##name##_##overload::precompute_out<> + +// Use this to define the prototype for an implementation. This takes only +// one argument, which is the name of the dispatch key entry you're +// implementing. +// +// Example usage: +// +// TORCH_IMPL_FUNC(add_cpu) ( +// Tensor& result, const Tensor& self, const Tensor& other +// ) { +// ... do the actual implementation ... +// } +// +#define TORCH_IMPL_FUNC(name) void structured_##name::impl + +// Base class for all structured kernel classes. The set_output virtual +// method is varied depending whether or not the operator is +// functional/out/inplace, and could also be specialized for CPU/CUDA/etc +// (although presently it isn't). +// +// A notable subclass of this interface is TensorIteratorBase. +struct TORCH_API MetaBase { + MetaBase() = default; + MetaBase(const MetaBase&) = default; + MetaBase& operator=(const MetaBase&) = default; + MetaBase(MetaBase&&) noexcept = default; + MetaBase& operator=(MetaBase&&) noexcept = default; + virtual const Tensor& maybe_get_output(int64_t output_idx) = 0; + + // Note: [set_output_*] + // See: https://github.com/pytorch/pytorch/issues/69813 + // Whenever defining the output properties in the META function of a + // structured kernel (what was usually done with `set_output`), use one of + // these 3 variants, instead. In order to decide which variant to use, check + // the following decision tree: + // + // - Can the kernel you are going to implement support output tensors + // with arbitrary strides? + // | + // -- YES: `set_output_raw_strided` + // | + // -- NO: Should the output tensor strides be contiguous? + // | + // -- YES: `set_output_contiguous` + // | + // -- NO: `set_output_strided` + // + // Use this function whenever the kernel requires specific strides for the + // output. If `strides` does not match the given output strides, proxy outputs + // will be created and passed to the IMPL function. + virtual void set_output_strided( + int64_t output_idx [[maybe_unused]], + IntArrayRef sizes [[maybe_unused]], + IntArrayRef strides [[maybe_unused]], + TensorOptions options [[maybe_unused]], + DimnameList names [[maybe_unused]] = {}) { + TORCH_INTERNAL_ASSERT(false, "set_output_strided not implemented."); + } + + // Use this function whenever the kernel knows how to handle arbitrary strided + // outputs. This function has the same behavior as the old `set_output`: it + // will only re-stride if the given output was resized. + virtual void set_output_raw_strided( + int64_t output_idx [[maybe_unused]], + IntArrayRef sizes [[maybe_unused]], + IntArrayRef strides_hint [[maybe_unused]], + TensorOptions options [[maybe_unused]], + DimnameList names [[maybe_unused]] = {}) { + TORCH_INTERNAL_ASSERT(false, "set_output_strided not implemented."); + } + + // Use this function if the kernel requires contiguous strides. + // Alias for `set_output_strided`, but with contiguous strides. + void set_output_contiguous( + int64_t output_idx, + IntArrayRef sizes, + TensorOptions options, + DimnameList names = {}) { + auto strides = c10::contiguous_strides(sizes); + set_output_strided(output_idx, sizes, strides, options, names); + } + + // Returns a reference to an undefined tensor if there is no presupplied + // output + const Tensor& maybe_get_output() { + return maybe_get_output(0); + } + virtual ~MetaBase() = default; +}; + +} // namespace impl + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorNames.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorNames.h new file mode 100644 index 0000000000000000000000000000000000000000..616efc14d2599d7f1a9f73f04fdf960e05bfcf2e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorNames.h @@ -0,0 +1,75 @@ +#pragma once + +#include + +namespace at::namedinference { + +// TensorName and TensorNames are wrappers around Dimname and DimnameList +// that contain helper functions to make writing name inference rules easier. +// +// A TensorName represents a Dimname associated with some DimnameList (from a +// Tensor). This encapsulates all the information that is needed to check if +// names *match* and to *unify* names. +// +// Definition: Two names in two tensors *match* if they are equal, or if at +// least one of them is a wildcard that can be *refined* to the other name. +// +// Definition: unify(name, other) fails if the names do not match. Otherwise, +// it returns the most refined of name and other. +// +// Here is an example of checking if two names match. +// tensor: Tensor[A, None] +// other: Tensor[A] +// +// Let's say we wish to check if tensor.names[-1] matches other.names[-1]. +// None (in tensor) cannot match A (in other) because if the None were refined +// to A, `tensor` would have duplicate names [A, A]. Therefore we need to check +// tensor.names [A, None] for the existence of A. +struct TORCH_API TensorName { + explicit TensorName(ArrayRef origin, int origin_idx) + : origin_(origin), + name_(origin[maybe_wrap_dim( + origin_idx, + static_cast(origin.size()))]), + origin_idx_(origin_idx) {} + + // op_name is only used for error reporting. + const TensorName& unify(const TensorName& other, const char* op_name) const; + Dimname toDimname() const; + + private: + ArrayRef origin_; + Dimname name_; + int origin_idx_; // A named tensor can have at most 64 dims. + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const TensorName& tensorname); +}; + +using TensorNameVec = SmallVector; + +struct TORCH_API TensorNames { + explicit TensorNames(ArrayRef names); + + // Create TensorNames from names[start:end]. Each individual TensorName stores + // `names`, NOT names[start:end], because the original tensor's names are + // `names`. + explicit TensorNames(ArrayRef names, int64_t start, int64_t end); + + // op_name is only used for error reporting. + TensorNames& unifyFromRightInplace( + const TensorNames& other, + const char* op_name = "unify"); + void checkUnique(const char* op_name) const; + + void append(TensorName name); + std::vector toDimnameVec() const; + + private: + explicit TensorNames(TensorNameVec&& names) : names_(std::move(names)){}; + + TensorNameVec names_; +}; + +} // namespace at::namedinference diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorOperators.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorOperators.h new file mode 100644 index 0000000000000000000000000000000000000000..383c5f0f74517e8f26ee8a8a8132f703252cfda1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/TensorOperators.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#else +#include +#endif + +namespace at { + +#define AT_FORALL_BINARY_OPS(_) \ + _(+, x.add(y), y.add(x)) \ + _(*, x.mul(y), y.mul(x)) \ + _(-, \ + x.sub(y), \ + ::at::empty_like(y, at::MemoryFormat::Preserve).fill_(x).sub_(y)) \ + _(/, \ + x.div(y), \ + ::at::empty_like(y, at::MemoryFormat::Preserve).fill_(x).div_(y)) \ + _(%, \ + x.remainder(y), \ + ::at::empty_like(y, at::MemoryFormat::Preserve).fill_(x).remainder_(y)) \ + _(&, x.bitwise_and(y), y.bitwise_and(x)) \ + _(|, x.bitwise_or(y), y.bitwise_or(x)) \ + _(^, x.bitwise_xor(y), y.bitwise_xor(x)) \ + _(<, x.lt(y), y.gt(x)) \ + _(<=, x.le(y), y.ge(x)) \ + _(>, x.gt(y), y.lt(x)) \ + _(>=, x.ge(y), y.le(x)) \ + _(==, x.eq(y), y.eq(x)) \ + _(!=, x.ne(y), y.ne(x)) + +#define DEFINE_OPERATOR(op, body, reverse_scalar_body) \ + inline Tensor operator op(const Tensor& x, const Tensor& y) { \ + return body; \ + } \ + inline Tensor operator op(const Tensor& x, const Scalar& y) { \ + return body; \ + } \ + inline Tensor operator op(const Scalar& x, const Tensor& y) { \ + return reverse_scalar_body; \ + } + +AT_FORALL_BINARY_OPS(DEFINE_OPERATOR) +#undef DEFINE_OPERATOR +#undef AT_FORALL_BINARY_OPS + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/TracerMode.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/TracerMode.h new file mode 100644 index 0000000000000000000000000000000000000000..8ba62640fe65056431cb9c3b815a0e8e42f50f68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/TracerMode.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include +#include + +// NOTE [Tracing Mode Switches] +// +// Historically, tracing function was controlled by two switches: +// +// - `AutoDispatchBelowADInplaceOrView` guard +// +// Tracing function used to be script-generated inside `VariableType_*.cpp` +// kernels, sharing the same `Autograd` dispatch key with autograd function. +// Therefore, before tracing function was moved out of VariableType, +// `AutoDispatchBelowADInplaceOrView` guard can also disable tracing as a +// side effect of disabling `Autograd` dispatching. +// +// - `setTracingState()` API in `torch/csrc/jit/frontend/tracer.h` +// +// It stores tracing data in a `TracingState` object in TLS. If the +// `TracingState` object in TLS is `null`, then tracing is paused. +// +// The `TracingState` object is created in `tracer::trace()` - the main +// entrance of tracing function. It's temporarily set to `null` inside +// generated VariableType (now TraceType) to bypass tracing for intermediate +// ops (ops being called by other ops). After the intermediate op call +// finishes it's set back to the original `TracingState` object. +// +// The `TracingState` obect in TLS can also be read/written via its Python +// binding in `python_tracer.cpp`, and `get/setTracingState()` C++ APIs, +// which are also exposed as `TORCH_API`. +// +// Two new switches were introduced since tracing function was moved out of +// VariableType: +// +// - `tracer::impl::set_dispatch_enabled()` API +// +// Unlike the special `Autograd` dispatch key which is included in dispatch +// key set by default, `Tracer` dispatch key is off by default. The +// dispatching switch can be toggled via this new API. +// +// - `tracer::impl::NoTracerDispatchMode` guard +// +// It's used to cover the old semantics of `AutoDispatchBelowADInplaceOrView` +// after tracing was moved out of VariableType. +// +// Before tracing function was moved out of VariableType, tracing was enabled +// when the following conditions are satisfied: +// +// 1) `TracingState` object in TLS != null; +// - Either inside the execution scope of `tracer::trace()`, or +// - Eagerly called `setTracingState()` with non-null object. +// 2) Not inside `AutoDispatchBelowADInplaceOrView` scope; +// +// After: +// +// 1) `TracingState` object in TLS != null; +// 2) Has called `tracer::impl::set_dispatch_enabled(true)`; +// 3) Not inside `tracer::impl::NonDispatchGuard` scope; +// +// [TODOs] +// +// - `setTracingState()` v.s. `tracer::impl::set_dispatch_enabled()` +// +// Currently `set_dispatch_enabled()` is set/unset inside `setTracingState()` +// to keep the semantics exactly the same as before - it's confusing to keep +// both switches, though. We should consider simplifying/limiting the exposed +// `setTracingState()` Python/C++ APIs (and other APIs calling it) so that +// these two can be unified. +// +// - `AutoDispatchBelowADInplaceOrView` v.s. +// `tracer::impl::NoTracerDispatchMode` +// +// We don't need to always set both guards together to keep semantics +// unchanged. For the follow use cases of `AutoDispatchBelowADInplaceOrView` +// we don't need set the new tracer guard: +// +// * Script-generated VariableType kernels. The guard is not necessary as +// tracing is already disabled explicitly by `setTracingState(null)` in +// generated TraceType kernels - we could keep it as is or use the new guard +// instead. +// +// * Custom ops. Will be handled by fallback kernel for `Tracer`. +// +// * Functions that are not likely to be called in tracing context (no python +// binding / not an operator), e.g.: all mobile forward() wrappers, test +// binaries, and etc. +// +// * Where new threads are spawned, e.g.: ATen/native/ConvolutionMM2d.cpp. +// It's not necessary as tracing is off by default. +// +// For the rest of cases we might need have both: +// +// * Functions that might be reachable from eager mode python (especially +// factory methods), e.g.: +// `internal_new_from_data()` in `torch/csrc/utils/tensor_new.cpp`. +// Without the new guard it will add `aten::empty` to the traced graph. +// +// * Some manually maintained functions, e.g.: +// `torch/csrc/autograd/VariableTypeManual.cpp`. +// Set the new guard if it's not obvious whether `setTracingState(null)` +// has been called before it reaches the `AutoDispatchBelowADInplaceOrView` +// guard. +// +// We might need tweak the usage of the new guard to optimize/fix things. +// It should only affect the correctness of tracing function, because the +// guard is essentially no-op when the master `setTracingState()` switch is +// off. + +// TODO: move this from `at::` to `jit::torch::` after +// `aten/src/ATen/cpp_custom_type_hack.h` is removed. + +namespace at::tracer::impl { + +inline bool is_dispatch_enabled() { + return c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Tracer) && + !c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Tracer); +} + +inline void set_dispatch_enabled(bool enabled) { + TORCH_INTERNAL_ASSERT( + !c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Tracer), + "Cannot enable tracing within the scope of NoTracerDispatchMode!"); + c10::impl::tls_set_dispatch_key_included(at::DispatchKey::Tracer, enabled); +} + +struct NoTracerDispatchMode { + c10::impl::ExcludeDispatchKeyGuard guard_{at::DispatchKey::Tracer}; +}; + +} // namespace at::tracer::impl diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..8b1ad3026cd046f21257778a9bba646a36f179cc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h @@ -0,0 +1,156 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace at { + +// if dim_post_expr is 0 and wrap_scalar is true, then dim must be in the +// range [-1, 0]. This is a special case for scalar tensors and manifests in +// e.g. torch.sum(scalar_tensor, 0) Otherwise, dim should be in the range +// [-dim_post_expr, dim_post_expr-1]. +using c10::maybe_wrap_dim; + +inline int64_t maybe_wrap_dim(int64_t dim, TensorImpl* tensor) { + return maybe_wrap_dim(dim, tensor->dim()); +} + +inline int64_t maybe_wrap_dim(int64_t dim, TensorList tensors) { + if (tensors.empty()) { + // can't wrap empty TensorList; rely on underlying implementation to throw + // error if necessary. + return dim; + } + return maybe_wrap_dim(dim, tensors[0].dim()); +} + +inline int64_t maybe_wrap_dim( + int64_t dim, + const std::vector>& tensor_sizes) { + if (tensor_sizes.empty()) { + // can't wrap empty list; rely on underlying implementation to throw error + // if necessary + return dim; + } + return maybe_wrap_dim(dim, tensor_sizes[0].size()); +} + +// Given an array of dimensions `dims` of length `ndims`, this function "Wraps" +// each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be +// specified using negative indices. +// +// Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will +// allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for +// dimensions not in the range [-dim_post_expr, dim_post_expr). +inline void maybe_wrap_dims_n( + int64_t* dims, + int64_t ndims, + int64_t dim_post_expr, + bool wrap_scalars = true) { + if (dim_post_expr <= 0) { + if (wrap_scalars) { + dim_post_expr = 1; // this will make range [-1, 0] + } else { + TORCH_CHECK_INDEX( + ndims == 0, + "Dimension specified as ", + dims[0], + " but tensor has no dimensions"); + return; + } + } + int64_t min = -dim_post_expr; + int64_t max = dim_post_expr - 1; + for (const auto i : c10::irange(ndims)) { + auto& dim = dims[i]; + if (dim < min || dim > max) { + TORCH_CHECK_INDEX( + false, + "Dimension out of range (expected to be in range of [", + min, + ", ", + max, + "], but got ", + dim, + ")"); + } + if (dim < 0) + dim += dim_post_expr; + } +} + +// Given a contiguous container of dimensions `dims`, this function "Wraps" +// each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be +// specified using negative indices. +// +// Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will +// allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for +// dimensions not in the range [-dim_post_expr, dim_post_expr). +template +inline void maybe_wrap_dims( + Container& dims, + int64_t dim_post_expr, + bool wrap_scalars = true) { + return maybe_wrap_dims_n( + dims.data(), dims.size(), dim_post_expr, wrap_scalars); +} + +// previously, size [0] tensors were the only possible empty tensors; thus, it +// wasn't possible to cat empty tensors unless all the other tensors were +// 1-dimensional, so we allowed these tensors to be "skipped" (both for wrap +// dimension behavior and dimension size checking). We maintain this behavior +// for backwards compatibility, but only for this specific size (i.e. other +// empty sizes are not skipped). +inline int64_t legacy_cat_wrap_dim( + int64_t dim, + const std::vector>& tensor_sizes) { + for (auto& sizes : tensor_sizes) { + if (sizes.size() == 1 && sizes[0] == 0) { + continue; + } + return maybe_wrap_dim(dim, static_cast(sizes.size())); + } + return dim; +} + +inline int64_t legacy_cat_wrap_dim_symint( + int64_t dim, + const std::vector>& tensor_sizes) { + for (auto& sizes : tensor_sizes) { + if (sizes.size() == 1) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[0].sym_eq(0))) { + continue; + } + } + return maybe_wrap_dim(dim, static_cast(sizes.size())); + } + return dim; +} + +inline int64_t legacy_cat_wrap_dim( + int64_t dim, + const MaterializedITensorListRef& tensors) { + for (const Tensor& tensor : tensors) { + if (tensor.dim() == 1) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(tensor.sym_sizes()[0].sym_eq(0))) { + continue; + } + } + return maybe_wrap_dim(dim, tensor.dim()); + } + return dim; +} + +// wrap negative dims in a vector +inline void wrap_all_dims( + std::vector& dims_to_wrap, + int64_t tensor_total_dims) { + for (const auto i : c10::irange(dims_to_wrap.size())) { + dims_to_wrap[i] = maybe_wrap_dim(dims_to_wrap[i], tensor_total_dims); + } +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtilsMulti.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtilsMulti.h new file mode 100644 index 0000000000000000000000000000000000000000..ab86f5d71e4f06e3bdfb18a29d636c0e93469b2d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtilsMulti.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace at { + +// This is in an extra file to work around strange interaction of +// bitset on Windows with operator overloading + +constexpr size_t dim_bitset_size = 64; + +inline std::bitset dim_list_to_bitset( + OptionalIntArrayRef opt_dims, + size_t ndims) { + TORCH_CHECK( + ndims <= dim_bitset_size, + "only tensors with up to ", + dim_bitset_size, + " dims are supported"); + std::bitset seen; + if (opt_dims.has_value()) { + auto dims = opt_dims.value(); + for (const auto i : c10::irange(dims.size())) { + size_t dim = maybe_wrap_dim(dims[i], static_cast(ndims)); + TORCH_CHECK( + !seen[dim], + "dim ", + dim, + " appears multiple times in the list of dims"); + seen[dim] = true; + } + } else { + for (size_t dim = 0; dim < ndims; dim++) { + seen[dim] = true; + } + } + return seen; +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/autocast_mode.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/autocast_mode.h new file mode 100644 index 0000000000000000000000000000000000000000..95f1dd2ca0c009ed92b55998fcdf8acc8f5f711c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/autocast_mode.h @@ -0,0 +1,927 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace at::autocast { + +TORCH_API bool is_autocast_enabled(at::DeviceType device_type); +TORCH_API void set_autocast_enabled(at::DeviceType device_type, bool enabled); +TORCH_API at::ScalarType get_autocast_dtype(at::DeviceType device_type); +TORCH_API void set_autocast_dtype( + at::DeviceType device_type, + at::ScalarType dtype); +TORCH_API void clear_cache(); +TORCH_API int increment_nesting(); +TORCH_API int decrement_nesting(); +TORCH_API bool is_autocast_cache_enabled(); +TORCH_API void set_autocast_cache_enabled(bool enabled); + +// deprecated CUDA-specific autocast APIs +C10_DEPRECATED_MESSAGE( + "at::autocast::is_enabled() is deprecated. Please use at::autocast::is_autocast_enabled(at::kCUDA) instead.") +TORCH_API inline bool is_enabled() { + TORCH_WARN_DEPRECATION( + "at::autocast::", + __func__, + "() is deprecated. Please use at::autocast::is_autocast_enabled(at::kCUDA) instead.") + return is_autocast_enabled(at::kCUDA); +} +C10_DEPRECATED_MESSAGE( + "at::autocast::set_enabled(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(at::kCUDA, enabled) instead.") +TORCH_API inline void set_enabled(bool enabled) { + TORCH_WARN_DEPRECATION( + "at::autocast::", + __func__, + "(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(at::kCUDA, enabled) instead.") + set_autocast_enabled(at::kCUDA, enabled); +} +C10_DEPRECATED_MESSAGE( + "at::autocast::get_autocast_gpu_dtype() is deprecated. Please use at::autocast::get_autocast_dtype(at::kCUDA) instead.") +TORCH_API inline at::ScalarType get_autocast_gpu_dtype() { + TORCH_WARN_DEPRECATION( + "at::autocast::", + __func__, + "() is deprecated. Please use at::autocast::get_autocast_dtype(at::kCUDA) instead.") + return get_autocast_dtype(at::kCUDA); +} +C10_DEPRECATED_MESSAGE( + "at::autocast::set_autocast_gpu_dtype(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(at::kCUDA, dtype) instead.") +TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) { + TORCH_WARN_DEPRECATION( + "at::autocast::", + __func__, + "(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(at::kCUDA, dtype) instead.") + set_autocast_dtype(at::kCUDA, dtype); +} + +#define DECLARE_DEPRECATED_AUTOCAST_APIS(name, device_type) \ + C10_DEPRECATED_MESSAGE( \ + "at::autocast::is_" #name \ + "_enabled() is deprecated. Please use at::autocast::is_autocast_enabled(" #device_type \ + ") instead.") \ + TORCH_API inline bool is_##name##_enabled() { \ + TORCH_WARN_DEPRECATION( \ + "at::autocast::", \ + __func__, \ + "() is deprecated. Please use at::autocast::is_autocast_enabled(" #device_type \ + ") instead.") \ + return is_autocast_enabled(device_type); \ + } \ + \ + C10_DEPRECATED_MESSAGE( \ + "at::autocast::set_" #name \ + "_enabled(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(" #device_type \ + ", enabled) instead.") \ + TORCH_API inline void set_##name##_enabled(bool enabled) { \ + TORCH_WARN_DEPRECATION( \ + "at::autocast::", \ + __func__, \ + "(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(" #device_type \ + ", enabled) instead.") \ + set_autocast_enabled(device_type, enabled); \ + } \ + \ + C10_DEPRECATED_MESSAGE( \ + "at::autocast::get_autocast_" #name \ + "_dtype() is deprecated. Please use at::autocast::get_autocast_dtype(" #device_type \ + ") instead.") \ + TORCH_API inline at::ScalarType get_autocast_##name##_dtype() { \ + TORCH_WARN_DEPRECATION( \ + "at::autocast::", \ + __func__, \ + "() is deprecated. Please at::autocast::get_autocast_dtype(" #device_type \ + ") instead.") \ + return get_autocast_dtype(device_type); \ + } \ + \ + C10_DEPRECATED_MESSAGE( \ + "at::autocast::set_autocast_" #name \ + "_dtype(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(" #device_type \ + ", dtype) instead.") \ + TORCH_API inline void set_autocast_##name##_dtype(at::ScalarType dtype) { \ + TORCH_WARN_DEPRECATION( \ + "at::autocast::", \ + __func__, \ + "(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(" #device_type \ + ", dtype) instead.") \ + set_autocast_dtype(device_type, dtype); \ + } + +#define AT_FORALL_DEPRECATED_AUTOCAST_BAKCNEDS(_) \ + _(cpu, at::kCPU) \ + _(xpu, at::kXPU) \ + _(xla, at::kXLA) \ + _(hpu, at::kHPU) \ + _(ipu, at::kIPU) \ + _(privateuseone, at::kPrivateUse1) + +// deprecated other backend specific autocast APIs +AT_FORALL_DEPRECATED_AUTOCAST_BAKCNEDS(DECLARE_DEPRECATED_AUTOCAST_APIS) + +namespace { +inline bool is_autocast_eligible( + const Tensor& tensor, + c10::DeviceType device_type) { + switch (device_type) { + case c10::DeviceType::CUDA: + return (tensor.is_cuda() || tensor.is_xla()) && + tensor.is_floating_point(); + case c10::DeviceType::CPU: + return (tensor.is_cpu() || tensor.is_mkldnn()) && + tensor.is_floating_point(); + case c10::DeviceType::XPU: + return tensor.is_xpu() && tensor.is_floating_point(); + case c10::DeviceType::IPU: + return tensor.is_ipu() && tensor.is_floating_point(); + case c10::DeviceType::HPU: + return tensor.is_hpu() && tensor.is_floating_point(); + case c10::DeviceType::XLA: + return tensor.is_xla() && tensor.is_floating_point(); + case c10::DeviceType::PrivateUse1: + return tensor.is_privateuseone() && tensor.is_floating_point(); + case c10::DeviceType::MPS: + return tensor.is_mps() && tensor.is_floating_point(); + default: + return false; + } +} +} // namespace + +inline DispatchKey get_autocast_dispatch_key_from_device_type( + c10::DeviceType device_type) { + switch (device_type) { + case c10::DeviceType::CUDA: + return DispatchKey::Autocast; + case c10::DeviceType::CPU: + return DispatchKey::AutocastCPU; + case c10::DeviceType::XPU: + return DispatchKey::AutocastXPU; + case c10::DeviceType::IPU: + return DispatchKey::AutocastIPU; + case c10::DeviceType::HPU: + return DispatchKey::AutocastHPU; + case c10::DeviceType::XLA: + return DispatchKey::AutocastXLA; + case c10::DeviceType::PrivateUse1: + return DispatchKey::AutocastPrivateUse1; + case c10::DeviceType::MPS: + return DispatchKey::AutocastMPS; + default: + throw std::runtime_error( + "unknown device type for autocast in get_autocast_dispatch_key_from_device_type"); + } +} + +inline bool is_autocast_available(c10::DeviceType device_type) { + if (device_type == at::kCPU || device_type == at::kCUDA || + device_type == at::kXPU || device_type == at::kIPU || + device_type == at::kHPU || device_type == at::kXLA || + device_type == at::kPrivateUse1 || device_type == at::kMPS) { + return true; + } else { + return false; + } +} + +inline at::ScalarType get_lower_precision_fp_from_device_type( + c10::DeviceType device_type) { + if (is_autocast_available(device_type)) { + return get_autocast_dtype(device_type); + } else { + throw std::runtime_error( + "unknown device type for autocast in get_lower_precision_fp_from_device_type"); + } +} + +/******************************************************************** +Logic to extract the promote type from any Tensor or TensorList args. +********************************************************************/ + +// Overload to catch Tensor args. +// If nextArg is floating-point, compare its scalar_type with our +// current best guess for the promote type, and update if necessary. +inline at::ScalarType prioritize( + at::ScalarType current, + const Tensor& nextArg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + if (current == at::kDouble) { + AT_ERROR("promote type is double in at::autocast::prioritize"); + return current; + } + at::ScalarType lower_precision_fp = + get_lower_precision_fp_from_device_type(device_type); + if (is_autocast_eligible(nextArg, device_type)) { + auto next = nextArg.scalar_type(); + if (next == at::kDouble) { + return current; // ignores double tensors + } else if (current == at::kFloat || next == at::kFloat) { + return at::kFloat; // prioritizes float over lower_precision_fp + } else if (current == lower_precision_fp && next == lower_precision_fp) { + return lower_precision_fp; + } else { + AT_ERROR("Unexpected floating ScalarType in at::autocast::prioritize"); + return current; + } + } else { + return current; + } +} + +// Overload to catch TensorList args (for e.g. cat, stack). +// Reuses the overload above to process each Tensor in the list. +inline at::ScalarType prioritize( + at::ScalarType current, + const TensorList& list, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + for (const auto& tensor : list) { + current = prioritize(current, tensor, device_type); + } + return current; +} + +inline at::ScalarType prioritize( + at::ScalarType current, + const ITensorListRef& list, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + for (const auto& tensor : list) { + current = prioritize(current, tensor, device_type); + } + return current; +} + +// Template to catch non-Tensor args (no-op that returns current best guess) +template +inline at::ScalarType prioritize( + at::ScalarType current, + T nextArg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + return current; +} + +// Overload for the tail case. +inline at::ScalarType promote_type( + at::ScalarType current, + c10::DeviceType device_type) { + return current; +} + +// Unpack args and determine if incoming lower_precision_fp tensors need to be +// promoted to float32. Non-Tensor arguments are ignored. +template +inline at::ScalarType promote_type( + at::ScalarType current, + c10::DeviceType device_type, + Arg0 arg0, + Args... args) { + auto new_current = prioritize(current, arg0, device_type); + return promote_type(new_current, device_type, args...); +} + +/**************************************************** +Logic to apply cached casting to any Tensor argument. +****************************************************/ +inline bool is_eligible( + const Tensor& arg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + return ( + arg.defined() && is_autocast_eligible(arg, device_type) && + (arg.scalar_type() != at::kDouble)); +} + +// Overload to catch Tensor args +TORCH_API Tensor cached_cast( + at::ScalarType to_type, + const Tensor& arg, + c10::DeviceType device_type = c10::DeviceType::CUDA); + +// Overload to process std::optional +inline std::optional cached_cast( + at::ScalarType to_type, + const std::optional& arg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + if (arg.has_value()) { + return cached_cast(to_type, *arg, device_type); + } else { + return std::nullopt; + } +} + +// Overload to process TensorLists +inline std::vector cached_cast( + at::ScalarType to_type, + const TensorList& arg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + std::vector vec; + vec.reserve(arg.size()); + for (const auto& t : arg) { + vec.emplace_back(cached_cast(to_type, t, device_type)); + } + return vec; +} + +inline std::vector cached_cast( + at::ScalarType to_type, + const ITensorListRef& arg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + std::vector vec; + vec.reserve(arg.size()); + for (const auto& t : arg) { + vec.emplace_back(cached_cast(to_type, t, device_type)); + } + return vec; +} + +// Template to catch non-Tensor args. +template +inline T cached_cast( + at::ScalarType to_type, + T arg, + c10::DeviceType device_type = c10::DeviceType::CUDA) { + return arg; +} + +/******************************************************* +Logic to flip an output dtype flag. +Keep it simple for now by assuming only one such flag is +present in the argument list. If I ever need a function +with more than flag I'll figure out something else. +The policy is: +If the user has explicity specified a dtype, respect it. +Otherwise, set it to the autocast type. +********************************************************/ + +// Overload to catch dtype flags +std::optional inline set_opt_dtype( + at::ScalarType to_type, + const std::optional& dtype) { + return dtype.has_value() ? dtype : to_type; +} + +// Template to catch other args +template +inline T set_opt_dtype(at::ScalarType to_type, T arg) { + return arg; +} + +template +inline bool firstarg_is_eligible( + c10::DeviceType device_type, + const Tensor& arg, + Args... args) { + return is_eligible(arg, device_type); +} + +template +inline at::ScalarType type_from_firstarg( + c10::DeviceType device_type, + at::ScalarType to_type, + const Tensor& arg, + Args... args) { + return (is_eligible(arg, device_type) ? to_type : arg.scalar_type()); +} + +// Policies correspond to op categories that need code-divergent handling. +// Wrapper templates below are specialized based on a policy template parameter. +enum class CastPolicy : uint8_t { + lower_precision_fp = 0, // Cast all inputs to lower_precision_fp before + // running the op. Currently, lower_precision_fp is + // fp16 for AutocastCUDA, and is defined by user + // (default bf16) for AutocastCPU or other device. + fp32, // Cast all inputs to at::kFloat before running the op. + fp32_set_opt_dtype, // Treats functions (like softmax) that + // 1. we'd like to run in fp32 and + // 2. have a std::optional arg that controls + // the output type. + // fp32_set_opt_dtype wrappers' policy is: if the output + // type is already set, don't touch it, otherwise, set + // it to at::kFloat. + fp32_append_dtype, // Treats functions (like norm) that + // 1. we'd like to run in fp32 and + // 2. have some overloads that accept an output type and + // other overloads that don't. + // fp32_append_dtype wrappers wrap the overloads that don't + // have an output dtype. + // The wrapper policy is: append at::kFloat to the args, + // and redispatch to the type-aware overload. + promote, // Run in the widest dtype among several args. +}; + +/******************************************************************************************************** +Templates to provide wrapper functions + +I'm copying the pattern used in core/boxing/impl/WrapFunctionIntoFunctor.h to +extract args and return type. (see also +https://stackoverflow.com/questions/46533698/how-to-deduce-argument-list-from-function-pointer) + +This strategy uses an exterior "WrapFunction" that extracts arguments on behalf +of (in my case several specializations of) an interior "WrapFunction_". +Interior WrapFunction_ specializations are defined for each CastPolicy. +********************************************************************************************************/ + +// Base template for WrapFunction_, which is specialized to contain a "call" +// method each CastPolicy +template < + CastPolicy policy, + c10::DeviceType device_type, + class Redispatch, + Redispatch* F, + class Ret, + class ArgList> +struct WrapFunction_ {}; + +// CastPolicy::lower_precision_fp General_DeviceType +template < + c10::DeviceType device_type, + class Redispatch, + Redispatch* F, + class Ret, + class... Args> +struct WrapFunction_< + CastPolicy::lower_precision_fp, + device_type, + Redispatch, + F, + Ret, + guts::typelist::typelist> { + static Ret call(Args... args) { + c10::impl::ExcludeDispatchKeyGuard no_autocast( + get_autocast_dispatch_key_from_device_type(device_type)); + return (*F)(cached_cast( + get_lower_precision_fp_from_device_type(device_type), + args, + device_type)...); + } +}; + +// CastPolicy::fp32 General_DeviceType +template < + c10::DeviceType device_type, + class Redispatch, + Redispatch* F, + class Ret, + class... Args> +struct WrapFunction_< + CastPolicy::fp32, + device_type, + Redispatch, + F, + Ret, + guts::typelist::typelist> { + static Ret call(Args... args) { + c10::impl::ExcludeDispatchKeyGuard no_autocast( + get_autocast_dispatch_key_from_device_type(device_type)); + return (*F)(cached_cast(at::kFloat, args, device_type)...); + } +}; + +// CastPolicy::fp32_set_opt_dtype General_DeviceType +template < + c10::DeviceType device_type, + class Redispatch, + Redispatch* F, + class Ret, + class... Args> +struct WrapFunction_< + CastPolicy::fp32_set_opt_dtype, + device_type, + Redispatch, + F, + Ret, + guts::typelist::typelist> { + static Ret call(Args... args) { + c10::impl::ExcludeDispatchKeyGuard no_autocast( + get_autocast_dispatch_key_from_device_type(device_type)); + if (firstarg_is_eligible(device_type, args...)) { + return (*F)(set_opt_dtype(at::kFloat, args)...); + } else { + // If ineligible, calls F with unaltered args. Does not set opt dtype, + // because setting opt dtype explicitly may interfere with internal + // implicit promotion decisions. + return (*F)(args...); + } + } +}; + +// CastPolicy::fp32_append_dtype General_DeviceType +template < + c10::DeviceType device_type, + class Redispatch, + Redispatch* F, + class Ret, + class... Args> +struct WrapFunction_< + CastPolicy::fp32_append_dtype, + device_type, + Redispatch, + F, + Ret, + guts::typelist::typelist> { + static Ret call(Args... args) { + c10::impl::ExcludeDispatchKeyGuard no_autocast( + get_autocast_dispatch_key_from_device_type(device_type)); + at::ScalarType out_type = + type_from_firstarg(device_type, at::kFloat, args...); + return (*F)(args..., out_type); + } +}; + +// CastPolicy::promote General_DeviceType +template < + c10::DeviceType device_type, + class Redispatch, + Redispatch* F, + class Ret, + class... Args> +struct WrapFunction_< + CastPolicy::promote, + device_type, + Redispatch, + F, + Ret, + guts::typelist::typelist> { + static Ret call(Args... args) { + c10::impl::ExcludeDispatchKeyGuard no_autocast( + get_autocast_dispatch_key_from_device_type(device_type)); + auto to_type = promote_type( + get_lower_precision_fp_from_device_type(device_type), + device_type, + args...); + return (*F)(cached_cast(to_type, args, device_type)...); + } +}; + +// Wrapper to infer return_type and parameter_types for WrapFunction_ (imitating +// core/boxing/impl/WrapFunctionIntoFunctor.h) +template < + CastPolicy policy, + c10::DeviceType device_type, + class Registered, // The signature for which we're registering. The + // dispatcher's calling code invokes our registered + // functions with arguments matching Registered, so we + // register WrapFunction_::call methods with a matching + // signature to properly field those arguments. + // guts::function_traits below extracts return_type and + // parameter_types from Registered, which WrapFunction_ + // templates above use to declare their call methods. + class Redispatch, // The signature for the function we're redispatching to. + // In most cases this is the same as Registered, but for + // some ops (for example, ops where we append a dtype) + // it's useful to redispatch to a function with a + // different signature. + Redispatch* F> // The actual function we're redispatching to. +struct WrapFunction final { + using type = WrapFunction_< + policy, + device_type, + Redispatch, + F, + typename guts::function_traits::return_type, + typename guts::function_traits::parameter_types>; +}; + +/***************************************************************************************************************** +This section performs load-time registration for autocast wrappers. + +It's debatable at what level operations should be patched. We'd like casts to +be autograd-exposed and precede autograd history recording, so that for +lower_precision_fp ops, input tensors are saved for backward in +lower_precision_fp rather than fp32. Saving inputs in lower_precision_fp +can significantly reduce a model's memory footprint. + +Option 1 (strawman): Patch only at the level of explicit calls into +cudnn/cublas (cudnn_convolution, etc), because those are the code paths that are +guaranteed to use Tensor Cores, therefore they're the ones that will benefit +most from lower_precision_fp. Potential pitfall: convolutions (and other ops) +are wrapped in several layers of at::* calls. If one of those happens to record +autograd history, then we've lost the opportunity to save inputs in +lower_precision_fp. + +Option 2: Patch the Python-exposed surface of calls, to make 100% sure autograd +history recording can't sneak in ahead of autocast. This mirrors Apex most +closely. + +I think Option 2 is the right answer for all ops, not just convolutions. Option +2 is what I implement here. +*****************************************************************************************************************/ + +/******************************************************************************************************************** +Explicit registration for out-of-place ops + +The stuff below could be codegenned. Ed said +> you are going to have to write the function definition at some point, I +wouldn't try to get clever about it Therefore, for the moment, this is all +copy pasted in from VariableTypeEverything.cpp with appropriate substitutions. +********************************************************************************************************************/ + +} // namespace at::autocast + +#define ADD_NS(RAW_OP) at::RAW_OP + +#define _KERNEL_OVERLOAD_NARG_IMPL(_0, _1, _2, N, ...) N +#define _KERNEL_OVERLOAD_NARG(...) \ + C10_EXPAND_MSVC_WORKAROUND(_KERNEL_OVERLOAD_NARG_IMPL(__VA_ARGS__, 2, 1)) + +// Common cases where registration signature matches redispatch signature +// (that's why SIGNATURE is repeated in the WrapFunction instantiation) +#define KERNEL1(DISPATCHKEY, OP, POLICY) \ + m.impl( \ + TORCH_SELECTIVE_NAME("aten::" #OP), \ + &::at::autocast::WrapFunction< \ + ::at::autocast::CastPolicy::POLICY, \ + DISPATCHKEY, \ + decltype(ATEN_FN(OP)), \ + decltype(ATEN_FN(OP)), \ + &ATEN_FN(OP)>::type::call); + +#define KERNEL2(DISPATCHKEY, OP, OVERLOAD, POLICY) \ + m.impl( \ + TORCH_SELECTIVE_NAME("aten::" #OP "." #OVERLOAD), \ + &::at::autocast::WrapFunction< \ + ::at::autocast::CastPolicy::POLICY, \ + DISPATCHKEY, \ + decltype(ATEN_FN2(OP, OVERLOAD)), \ + decltype(ATEN_FN2(OP, OVERLOAD)), \ + &ATEN_FN2(OP, OVERLOAD)>::type::call); + +#define _KERNEL_DISPATCH(DISPATCHKEY, NARG, ...) \ + C10_CONCATENATE(KERNEL, NARG)(DISPATCHKEY, __VA_ARGS__) + +#define _KERNEL_IMPL(DISPATCHKEY, ...) \ + _KERNEL_DISPATCH(DISPATCHKEY, _KERNEL_OVERLOAD_NARG(__VA_ARGS__), __VA_ARGS__) + +// It will dispatch to KERNEL1 or KERNEL2 based on its inputs. +#define KERNEL(DISPATCHKEY, ...) _KERNEL_IMPL(DISPATCHKEY, __VA_ARGS__) + +// Less-common but still useful case: redispatching to a function +// with a new signature (e.g. appending a dtype) +#define KERNEL_DIFFERENT_REDISPATCH_SIGNATURE( \ + DISPATCHKEY, \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) \ + m.impl( \ + TORCH_SELECTIVE_NAME("aten::" REGISTER_NAME), \ + &::at::autocast::WrapFunction< \ + ::at::autocast::CastPolicy::POLICY, \ + DISPATCHKEY, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + &REDISPATCH_FUNC>::type::call); + +// KERNEL_CPU/KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_CPU +// registration (OP, POLICY) or (OP, OVERLOAD, POLICY) for AutocastCPU +#define KERNEL_CPU(...) KERNEL(c10::DeviceType::CPU, __VA_ARGS__) + +#define KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_CPU( \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) \ + KERNEL_DIFFERENT_REDISPATCH_SIGNATURE( \ + c10::DeviceType::CPU, \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) + +// KERNEL_CUDA/KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_CUDA +// registration (OP, POLICY) or (OP, OVERLOAD, POLICY) for AutocastCUDA +#define KERNEL_CUDA(...) KERNEL(c10::DeviceType::CUDA, __VA_ARGS__) + +#define KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_CUDA( \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) \ + KERNEL_DIFFERENT_REDISPATCH_SIGNATURE( \ + c10::DeviceType::CUDA, \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) + +// KERNEL_XPU/KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_XPU +// registration (OP, POLICY) or (OP, OVERLOAD, POLICY) for AutocastXPU +#define KERNEL_XPU(...) KERNEL(c10::DeviceType::XPU, __VA_ARGS__) + +#define KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_XPU( \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) \ + KERNEL_DIFFERENT_REDISPATCH_SIGNATURE( \ + c10::DeviceType::XPU, \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) + +// KERNEL_PRIVATEUSEONE/KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_PRIVATEUSEONE +// registration (OP, POLICY) or (OP, OVERLOAD, POLICY) for AutocastPrivateUse1 +#define KERNEL_PRIVATEUSEONE(...) \ + KERNEL(c10::DeviceType::PrivateUse1, __VA_ARGS__) + +#define KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_PRIVATEUSEONE( \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) \ + KERNEL_DIFFERENT_REDISPATCH_SIGNATURE( \ + c10::DeviceType::PrivateUse1, \ + REDISPATCH_FUNC, \ + REGISTER_NAME, \ + REGISTER_SIGNATURE, \ + REDISPATCH_SIGNATURE, \ + POLICY) + +// KERNEL_MPS registration for AutocastMPS +#define KERNEL_MPS(OP, POLICY) \ + m.impl( \ + TORCH_SELECTIVE_NAME("aten::" #OP), \ + &WrapFunction< \ + CastPolicy::POLICY, \ + DeviceType::MPS, \ + decltype(ATEN_FN(OP)), \ + decltype(ATEN_FN(OP)), \ + &ATEN_FN(OP)>::type::call); + +#define KERNEL_MPS2(OP, OVERLOAD, POLICY) \ + m.impl( \ + TORCH_SELECTIVE_NAME("aten::" #OP "." #OVERLOAD), \ + &WrapFunction< \ + CastPolicy::POLICY, \ + DeviceType::MPS, \ + decltype(ATEN_FN2(OP, OVERLOAD)), \ + decltype(ATEN_FN2(OP, OVERLOAD)), \ + &ATEN_FN2(OP, OVERLOAD)>::type::call); + +// Op lists for different policies. +// To make sure other backends can reuse the policy op list. +#define AT_FORALL_LOWER_PRECISION_FP(_) \ + _(_convolution, deprecated) \ + _(_convolution) \ + _(conv1d) \ + _(conv2d) \ + _(conv3d) \ + _(conv_tbc) \ + _(conv_transpose1d) \ + _(conv_transpose2d, input) \ + _(conv_transpose3d, input) \ + _(convolution) \ + _(prelu) \ + _(addmm) \ + _(addmv) \ + _(addr) \ + _(matmul) \ + _(einsum) \ + _(mm) \ + _(mv) \ + _(linalg_vecdot) \ + _(linear) \ + _(addbmm) \ + _(baddbmm) \ + _(bmm) \ + _(chain_matmul) \ + _(linalg_multi_dot) \ + _(_thnn_fused_lstm_cell) \ + _(_thnn_fused_gru_cell) \ + _(lstm_cell) \ + _(gru_cell) \ + _(rnn_tanh_cell) \ + _(rnn_relu_cell) \ + _(_scaled_dot_product_flash_attention) \ + _(scaled_dot_product_attention) + +#define AT_FORALL_FP32(_) \ + _(acos) \ + _(asin) \ + _(cosh) \ + _(erfinv) \ + _(exp) \ + _(expm1) \ + _(log) \ + _(log10) \ + _(log2) \ + _(log1p) \ + _(reciprocal) \ + _(rsqrt) \ + _(sinh) \ + _(tan) \ + _(pow, Tensor_Scalar) \ + _(pow, Tensor_Tensor) \ + _(pow, Scalar) \ + _(softplus) \ + _(layer_norm) \ + _(native_layer_norm) \ + _(group_norm) \ + _(frobenius_norm, dim) \ + _(nuclear_norm) \ + _(nuclear_norm, dim) \ + _(cosine_similarity) \ + _(poisson_nll_loss) \ + _(cosine_embedding_loss) \ + _(nll_loss) \ + _(nll_loss2d) \ + _(hinge_embedding_loss) \ + _(kl_div) \ + _(l1_loss) \ + _(smooth_l1_loss) \ + _(huber_loss) \ + _(mse_loss) \ + _(margin_ranking_loss) \ + _(multilabel_margin_loss) \ + _(soft_margin_loss) \ + _(triplet_margin_loss) \ + _(multi_margin_loss) \ + _(binary_cross_entropy_with_logits) \ + _(dist) \ + _(pdist) \ + _(cdist) \ + _(renorm) \ + _(logsumexp) \ + _(upsample_nearest1d) \ + _(_upsample_nearest_exact1d) \ + _(upsample_nearest2d) \ + _(_upsample_nearest_exact2d) \ + _(upsample_nearest3d) \ + _(_upsample_nearest_exact3d) \ + _(upsample_linear1d) \ + _(upsample_bilinear2d) \ + _(_upsample_bilinear2d_aa) \ + _(upsample_trilinear3d) \ + _(upsample_bicubic2d) \ + _(_upsample_bicubic2d_aa) + +#define AT_FORALL_FP32_SET_OPT_DTYPE(_) \ + _(prod) \ + _(prod, dim_int) \ + _(prod, dim_Dimname) \ + _(softmax, int) \ + _(softmax, Dimname) \ + _(log_softmax, int) \ + _(log_softmax, Dimname) \ + _(cumprod) \ + _(cumprod, dimname) \ + _(cumsum) \ + _(cumsum, dimname) \ + _(linalg_vector_norm) \ + _(linalg_matrix_norm) \ + _(linalg_matrix_norm, str_ord) \ + _(sum) \ + _(sum, dim_IntList) \ + _(sum, dim_DimnameList) + +#define AT_FORALL_DIFFERENT_REDISPATCH_SIGNATURE(_) \ + _(ADD_NS(norm), \ + "norm.Scalar", \ + Tensor(const Tensor&, const Scalar&), \ + Tensor(const Tensor&, const std::optional&, ScalarType), \ + fp32_append_dtype) \ + _(ADD_NS(norm), \ + "norm.ScalarOpt_dim", \ + Tensor(const Tensor&, const std::optional&, IntArrayRef, bool), \ + Tensor( \ + const Tensor&, \ + const std::optional&, \ + IntArrayRef, \ + bool, \ + ScalarType), \ + fp32_append_dtype) \ + _(ADD_NS(norm), \ + "norm.names_ScalarOpt_dim", \ + Tensor(const Tensor&, const std::optional&, DimnameList, bool), \ + Tensor( \ + const Tensor&, \ + const std::optional&, \ + DimnameList, \ + bool, \ + ScalarType), \ + fp32_append_dtype) + +#define AT_FORALL_PROMOTE(_) \ + _(addcdiv) \ + _(addcmul) \ + _(atan2) \ + _(bilinear) \ + _(cross) \ + _(dot) \ + _(vdot) \ + _(grid_sampler) \ + _(index_put) \ + _(tensordot) \ + _(scatter_add) diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/ceil_div.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/ceil_div.h new file mode 100644 index 0000000000000000000000000000000000000000..37d67b232a22c11fa7dccf638b7897c0854ab8bd --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/ceil_div.h @@ -0,0 +1,24 @@ +#pragma once +#include +#include + +namespace at { + +/** + Computes ceil(a / b) +*/ +template >> +C10_ALWAYS_INLINE C10_HOST_DEVICE T ceil_div(T a, T b) { + return (a + b - 1) / b; +} + +/** + Computes ceil(a / b) * b; i.e., rounds up `a` to the next highest + multiple of b +*/ +template +C10_ALWAYS_INLINE C10_HOST_DEVICE T round_up(T a, T b) { + return ceil_div(a, b) * b; +} + +} // namespace at diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h new file mode 100644 index 0000000000000000000000000000000000000000..b9dfc618e54aa2340144563d5fde34b8d0e193fb --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h @@ -0,0 +1,242 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace at::jit { + +// A template environment is a mapping from template variable names, e.g., +// identifier (corresponding to $identifier) to their expansions. +// +// This template environment supports storing strings, numbers and lists +// of strings, and can be chained together (so that lookup proceeds in +// in the top level environment, and then recurses into a parent +// environment if the key is not found.) +struct TemplateEnv { + TemplateEnv() = default; + TemplateEnv(TemplateEnv& parent) : parent(&parent) {} + TemplateEnv& operator=(const TemplateEnv& parent) = delete; + + using string_list = std::vector; + + // Add a string 'v' to the map at key 'k'. + void s(const std::string& k, const std::string& v) { + strings_[k] = v; + lists_.erase(k); + } + + // Add a number 'v' to the map at key 'k' + template + void d(const std::string& k, const T& v) { + strings_[k] = std::to_string(v); + lists_.erase(k); + } + + // Retrieve the string representation of the value stored at 'k' from the map. + // Raises an exception if the key is not found. + const std::string& s(const std::string& k) const { + if (strings_.count(k) == 0) { + if (parent) { + return parent->s(k); + } + notFound(k); + } + return strings_.at(k); + } + + // Store a list of strings 'v' in the map at 'k'. + void v(const std::string& k, const string_list& v) { + lists_[k] = v; + strings_.erase(k); + } + + // Retrieve a list of strings stored at 'k' from the map. + // Raises an exception if the key is not found. + const string_list& v(const std::string& k) const { + if (lists_.count(k) == 0) { + if (parent) { + return parent->v(k); + } + notFound(k); + } + return lists_.at(k); + } + + // Test if a string 'k' is a string (as opposed to a list.) + bool keyIsString(const std::string& k) const { + if (strings_.count(k) > 0) + return true; + if (lists_.count(k) > 0) + return false; + if (parent) + return parent->keyIsString(k); + notFound(k); + } + + private: + [[noreturn]] void notFound(const std::string& k) const { + std::stringstream ss; + ss << "key not found: " << k; + throw std::logic_error(ss.str()); + } + + std::unordered_map strings_; + std::unordered_map lists_; + TemplateEnv* parent{nullptr}; +}; + +/* +# Match $identifier or ${identifier} and replace with the value in env. +# If this identifier is at the beginning of whitespace on a line +# and its value is a list then it is treated as +# block substitution by indenting all lines of all elements. +# If the identifier is on a line starting with non-whitespace and a list +# then it is comma separated. ${,foo} will insert a comma before the list +# if this list is not empty and ${foo,} will insert one after. +*/ +struct CodeTemplate { + /* implicit */ CodeTemplate(std::string t) : template_text(std::move(t)) {} + + std::string format(const TemplateEnv& env) const { + std::stringstream out; + size_t pos = 0; + size_t indent = 0; + bool all_whitespace = true; + while (pos < template_text.size()) { + char c = template_text[pos]; + if (c == '$') { + std::stringstream kss; + bool comma_before = false; + bool comma_after = false; + size_t new_pos = parseKey(pos, kss, comma_before, comma_after); + std::string k = kss.str(); + bool is_string = env.keyIsString(k); + if (all_whitespace) { + if (is_string) + emitStringWithIndents(out, indent, env.s(k)); + else + emitLinesIndented(out, indent, env.v(k)); + } else { + if (is_string) + out << env.s(k); + else + emitCommaSeparatedList(out, env.v(k), comma_before, comma_after); + } + all_whitespace = false; + pos = new_pos; + } else { + out << c; + if (!isspace(c)) + all_whitespace = false; + indent++; + if (c == '\n') { + indent = 0; + all_whitespace = true; + } + pos++; + } + } + return out.str(); + } + + private: + using string_list = std::vector; + char charAt(size_t p) const { + if (p >= template_text.size()) + throw std::logic_error("EOS found in key"); + return template_text[p]; + } + size_t parseKey( + size_t pos, + std::ostream& k, + bool& comma_before, + bool& comma_after) const { + comma_before = false; + comma_after = false; + pos++; + if (charAt(pos) == '{') { + pos++; + if (charAt(pos) == ',') { + comma_before = true; + pos++; + } + pos = parseIdent(pos, k); + if (charAt(pos) == ',') { + comma_after = true; + pos++; + } + if (charAt(pos) != '}') + throw std::logic_error("missing terminating '}'"); + pos++; + return pos; + } else { + return parseIdent(pos, k); + } + } + size_t parseIdent(size_t pos, std::ostream& k) const { + while (pos < template_text.size() && + (isalnum(template_text[pos]) || template_text[pos] == '_')) { + k << template_text[pos]; + pos++; + } + return pos; + } + void emitCommaSeparatedList( + std::ostream& out, + const string_list& strings, + bool comma_before, + bool comma_after) const { + if (comma_before && !strings.empty()) + out << ", "; + for (const auto i : c10::irange(strings.size())) { + if (i > 0) + out << ", "; + out << strings[i]; + } + if (comma_after && !strings.empty()) + out << ", "; + } + // These indentation functions follow the convention that they never emit + // leading or trailing newlines when the input string does not have leading + // or trailing newlines. It's the responsibility of the calling function + // to indent correctly in the context. + void emitIndent(std::ostream& out, size_t indent) const { + for (C10_UNUSED const auto i : c10::irange(indent)) { + out << " "; + } + } + void emitStringWithIndents( + std::ostream& out, + size_t indent, + const std::string& str) const { + for (auto c : str) { + out << c; + if (c == '\n') { + emitIndent(out, indent); + } + } + } + void emitLinesIndented( + std::stringstream& out, + size_t indent, + const string_list& strings) const { + for (const auto i : c10::irange(strings.size())) { + if (i > 0) + emitIndent(out, indent); + emitStringWithIndents(out, indent, strings[i]); + if (i + 1 != strings.size()) + out << "\n"; + } + } + std::string template_text; +}; + +static inline std::string format(const std::string& fmt, TemplateEnv& env) { + return CodeTemplate(fmt).format(env); +} + +} // namespace at::jit diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/div_rtn.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/div_rtn.h new file mode 100644 index 0000000000000000000000000000000000000000..4935f49ae2726389441e4012cc15bcf3981f2e84 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/div_rtn.h @@ -0,0 +1,11 @@ +#pragma once + +// Integer division rounding to -Infinity +template +static inline T div_rtn(T x, T y) { + int q = x / y; + int r = x % y; + if ((r != 0) && ((r < 0) != (y < 0))) + --q; + return q; +} diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/jit_macros.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/jit_macros.h new file mode 100644 index 0000000000000000000000000000000000000000..9af826549021a0853beb83c74b6ac695728ab054 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/jit_macros.h @@ -0,0 +1,7 @@ +#pragma once +#include +#include + +// AT_USE_JITERATOR(), controls whether we jit some elementwise kernels +#define AT_USE_JITERATOR() true +#define jiterator_stringify(...) std::string(#__VA_ARGS__); diff --git a/.venv/lib/python3.11/site-packages/torch/include/ATen/jiterator_macros.h b/.venv/lib/python3.11/site-packages/torch/include/ATen/jiterator_macros.h new file mode 100644 index 0000000000000000000000000000000000000000..3aa4c7ebb0af07fd65012d9d531aaa140dd6c212 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/ATen/jiterator_macros.h @@ -0,0 +1,38 @@ +#pragma once +#include +#include + +#define JITERATOR_HOST_DEVICE C10_HOST_DEVICE +#if defined(_MSC_VER) && defined(__CUDACC__) +// NVRTC on Windows errors if __host__ __device__ attribute is +// present on kernel. +// error: attribute "__host__" does not apply here +// error: attribute "__device__" does not apply here +#define JITERATOR_HOST_DEVICE +#endif + +// jiterator_also_stringify_as macro is used to define code (for CPU/ROCm) +// and generate code string for `jiterator` (only when compiling for CUDA). +// Usage : +// jiterator_also_stringify_as( +// jiterator_code(template T identity(T x) { return x; }), +// identity_string); +// This will define the template `identity` as present in code and +// also define `std::string identity_string` with the code as the string +// if this is being compiled for CUDA. + +// `jiterator_code` macro is to deal with `,` in the kernel code. +// These `,`s confuse the preprocessor into thinking we are passing +// multiple arguments to the macro. +#define jiterator_code(...) __VA_ARGS__ +#if defined(__CUDACC__) || defined(__HIPCC__) +// CPU and CUDA and ROCm case +#define stringify_code(...) #__VA_ARGS__ +#define jiterator_also_stringify_as(code, str_name) \ + code /* define the function */ \ + const std::string str_name = std::string(stringify_code(code)); +#else +// CPU only or CPU and ROCm case +// Only needs the function +#define jiterator_also_stringify_as(code, str_name) code +#endif diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/crc_alt.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/crc_alt.h new file mode 100644 index 0000000000000000000000000000000000000000..9d1c4f1dc7ddc8997f7cc1297ef20d74de67afe0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/crc_alt.h @@ -0,0 +1,1343 @@ +#pragma once + +// ////////////////////////////////////////////////////////// +// Crc32.h +// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved. +// Slicing-by-16 contributed by Bulat Ziganshin +// Tableless bytewise CRC contributed by Hagai Gold +// see http://create.stephan-brumme.com/disclaimer.html +// + +// if running on an embedded system, you might consider shrinking the +// big Crc32Lookup table by undefining these lines: +#define CRC32_USE_LOOKUP_TABLE_BYTE +#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_4 +#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_8 +#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 +// - crc32_bitwise doesn't need it at all +// - crc32_halfbyte has its own small lookup table +// - crc32_1byte_tableless and crc32_1byte_tableless2 don't need it at all +// - crc32_1byte needs only Crc32Lookup[0] +// - crc32_4bytes needs only Crc32Lookup[0..3] +// - crc32_8bytes needs only Crc32Lookup[0..7] +// - crc32_4x8bytes needs only Crc32Lookup[0..7] +// - crc32_16bytes needs all of Crc32Lookup +// using the aforementioned #defines the table is automatically fitted to your needs + +// uint8_t, uint32_t, int32_t +#include +// size_t +#include + +// crc32_fast selects the fastest algorithm depending on flags (CRC32_USE_LOOKUP_...) +/// compute CRC32 using the fastest algorithm for large datasets on modern CPUs +uint32_t crc32_fast (const void* data, size_t length, uint32_t previousCrc32 = 0); + +/// merge two CRC32 such that result = crc32(dataB, lengthB, crc32(dataA, lengthA)) +uint32_t crc32_combine (uint32_t crcA, uint32_t crcB, size_t lengthB); + +/// compute CRC32 (bitwise algorithm) +uint32_t crc32_bitwise (const void* data, size_t length, uint32_t previousCrc32 = 0); +/// compute CRC32 (half-byte algoritm) +uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32 = 0); + +#ifdef CRC32_USE_LOOKUP_TABLE_BYTE +/// compute CRC32 (standard algorithm) +uint32_t crc32_1byte (const void* data, size_t length, uint32_t previousCrc32 = 0); +#endif + +/// compute CRC32 (byte algorithm) without lookup tables +uint32_t crc32_1byte_tableless (const void* data, size_t length, uint32_t previousCrc32 = 0); +/// compute CRC32 (byte algorithm) without lookup tables +uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32 = 0); + +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4 +/// compute CRC32 (Slicing-by-4 algorithm) +uint32_t crc32_4bytes (const void* data, size_t length, uint32_t previousCrc32 = 0); +#endif + +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_8 +/// compute CRC32 (Slicing-by-8 algorithm) +uint32_t crc32_8bytes (const void* data, size_t length, uint32_t previousCrc32 = 0); +/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times +uint32_t crc32_4x8bytes(const void* data, size_t length, uint32_t previousCrc32 = 0); +#endif + +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 +/// compute CRC32 (Slicing-by-16 algorithm) +uint32_t crc32_16bytes (const void* data, size_t length, uint32_t previousCrc32 = 0); +/// compute CRC32 (Slicing-by-16 algorithm, prefetch upcoming data blocks) +uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previousCrc32 = 0, size_t prefetchAhead = 256); +#endif + +// ////////////////////////////////////////////////////////// +// Crc32.cpp +// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved. +// Slicing-by-16 contributed by Bulat Ziganshin +// Tableless bytewise CRC contributed by Hagai Gold +// see http://create.stephan-brumme.com/disclaimer.html +// + +// if running on an embedded system, you might consider shrinking the +// big Crc32Lookup table: +// - crc32_bitwise doesn't need it at all +// - crc32_halfbyte has its own small lookup table +// - crc32_1byte needs only Crc32Lookup[0] +// - crc32_4bytes needs only Crc32Lookup[0..3] +// - crc32_8bytes needs only Crc32Lookup[0..7] +// - crc32_4x8bytes needs only Crc32Lookup[0..7] +// - crc32_16bytes needs all of Crc32Lookup + + +#ifndef __LITTLE_ENDIAN + #define __LITTLE_ENDIAN 1234 +#endif +#ifndef __BIG_ENDIAN + #define __BIG_ENDIAN 4321 +#endif + +// define endianess and some integer data types +#if defined(_MSC_VER) || defined(__MINGW32__) + // Windows always little endian + #define __BYTE_ORDER __LITTLE_ENDIAN + + // intrinsics / prefetching + #if defined(_M_ARM64) + #include + #else + #include + #endif + + #ifdef __MINGW32__ + #define PREFETCH(location) __builtin_prefetch(location) + #else + #if defined(_M_ARM64) + #define PREFETCH(location) __prefetch(location) + #else + #define PREFETCH(location) _mm_prefetch(location, _MM_HINT_T0) + #endif + #endif +#elif defined(__APPLE__) + #include + #if TARGET_IPHONE_SIMULATOR + #define __BYTE_ORDER __LITTLE_ENDIAN + #elif TARGET_OS_IPHONE + #define __BYTE_ORDER __LITTLE_ENDIAN + #elif TARGET_OS_MAC + #include + #if defined(__BIG_ENDIAN__) + #define __BYTE_ORDER __BIG_ENDIAN + #endif + #if defined(__LITTLE_ENDIAN__) + #define __BYTE_ORDER __LITTLE_ENDIAN + #endif + #else + # error "Unknown Apple platform" + #endif +#elif defined(__ARMEB__) + #define __BYTE_ORDER __BIG_ENDIAN +#elif (defined(__BYTE_ORDER__) and !defined(__BYTE_ORDER)) + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + #define __BYTE_ORDER __BIG_ENDIAN + #else + #define __BYTE_ORDER __LITTLE_ENDIAN + #endif +#else + // defines __BYTE_ORDER as __LITTLE_ENDIAN or __BIG_ENDIAN + #include +#endif + +// intrinsics / prefetching +#ifdef __GNUC__ + #define PREFETCH(location) __builtin_prefetch(location) +#else +#ifndef PREFETCH + // no prefetching + #define PREFETCH(location) ; +#endif +#endif + +// abort if byte order is undefined +#ifndef __BYTE_ORDER +#error undefined byte order, compile with -D__BYTE_ORDER=1234 (if little endian) or -D__BYTE_ORDER=4321 (big endian) +#endif + + +namespace +{ + /// zlib's CRC32 polynomial + const uint32_t Polynomial = 0xEDB88320; + + /// swap endianess + static inline uint32_t swap(uint32_t x) + { + #if defined(__GNUC__) || defined(__clang__) + return __builtin_bswap32(x); + #else + return (x >> 24) | + ((x >> 8) & 0x0000FF00) | + ((x << 8) & 0x00FF0000) | + (x << 24); + #endif + } + + /// Slicing-By-16 + #ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 + const size_t MaxSlice = 16; + #elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_8) + const size_t MaxSlice = 8; + #elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_4) + const size_t MaxSlice = 4; + #elif defined(CRC32_USE_LOOKUP_TABLE_BYTE) + const size_t MaxSlice = 1; + #else + #define NO_LUT // don't need Crc32Lookup at all + #endif + +} // anonymous namespace + +#ifndef NO_LUT +/// forward declaration, table is at the end of this file +extern const uint32_t Crc32Lookup[MaxSlice][256]; // extern is needed to keep compiler happy +#endif + + +/// compute CRC32 (bitwise algorithm) +uint32_t crc32_bitwise(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint8_t* current = (const uint8_t*) data; + + while (length-- != 0) + { + crc ^= *current++; + + for (int j = 0; j < 8; j++) + { + // branch-free + crc = (crc >> 1) ^ (-int32_t(crc & 1) & Polynomial); + + // branching, much slower: + //if (crc & 1) + // crc = (crc >> 1) ^ Polynomial; + //else + // crc = crc >> 1; + } + } + + return ~crc; // same as crc ^ 0xFFFFFFFF +} + + +/// compute CRC32 (half-byte algoritm) +uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint8_t* current = (const uint8_t*) data; + + /// look-up table for half-byte, same as crc32Lookup[0][16*i] + static const uint32_t Crc32Lookup16[16] = + { + 0x00000000,0x1DB71064,0x3B6E20C8,0x26D930AC,0x76DC4190,0x6B6B51F4,0x4DB26158,0x5005713C, + 0xEDB88320,0xF00F9344,0xD6D6A3E8,0xCB61B38C,0x9B64C2B0,0x86D3D2D4,0xA00AE278,0xBDBDF21C + }; + + while (length-- != 0) + { + crc = Crc32Lookup16[(crc ^ *current ) & 0x0F] ^ (crc >> 4); + crc = Crc32Lookup16[(crc ^ (*current >> 4)) & 0x0F] ^ (crc >> 4); + current++; + } + + return ~crc; // same as crc ^ 0xFFFFFFFF +} + + +#ifdef CRC32_USE_LOOKUP_TABLE_BYTE +/// compute CRC32 (standard algorithm) +uint32_t crc32_1byte(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint8_t* current = (const uint8_t*) data; + + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *current++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} +#endif + + +/// compute CRC32 (byte algorithm) without lookup tables +uint32_t crc32_1byte_tableless(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint8_t* current = (const uint8_t*) data; + + while (length-- != 0) + { + uint8_t s = uint8_t(crc) ^ *current++; + + // Hagai Gold made me aware of this table-less algorithm and send me code + + // polynomial 0xEDB88320 can be written in binary as 11101101101110001000001100100000b + // reverse the bits (or just assume bit 0 is the first one) + // and we have bits set at position 0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26 + // => those are the shift offsets: + //crc = (crc >> 8) ^ + // t ^ + // (t >> 1) ^ (t >> 2) ^ (t >> 4) ^ (t >> 5) ^ // == y + // (t >> 7) ^ (t >> 8) ^ (t >> 10) ^ (t >> 11) ^ // == y >> 6 + // (t >> 12) ^ (t >> 16) ^ // == z + // (t >> 22) ^ (t >> 26) ^ // == z >> 10 + // (t >> 23); + + // the fastest I can come up with: + uint32_t low = (s ^ (s << 6)) & 0xFF; + uint32_t a = (low * ((1 << 23) + (1 << 14) + (1 << 2))); + crc = (crc >> 8) ^ + (low * ((1 << 24) + (1 << 16) + (1 << 8))) ^ + a ^ + (a >> 1) ^ + (low * ((1 << 20) + (1 << 12) )) ^ + (low << 19) ^ + (low << 17) ^ + (low >> 2); + + // Hagai's code: + /*uint32_t t = (s ^ (s << 6)) << 24; + // some temporaries to optimize XOR + uint32_t x = (t >> 1) ^ (t >> 2); + uint32_t y = x ^ (x >> 3); + uint32_t z = (t >> 12) ^ (t >> 16); + crc = (crc >> 8) ^ + t ^ (t >> 23) ^ + y ^ (y >> 6) ^ + z ^ (z >> 10);*/ + } + + return ~crc; // same as crc ^ 0xFFFFFFFF +} + + +/// compute CRC32 (byte algorithm) without lookup tables +uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32) +{ + int32_t crc = ~previousCrc32; // note: signed integer, right shift distributes sign bit into lower bits + const uint8_t* current = (const uint8_t*) data; + + while (length-- != 0) + { + crc = crc ^ *current++; + + uint32_t c = (((crc << 31) >> 31) & ((Polynomial >> 7) ^ (Polynomial >> 1))) ^ + (((crc << 30) >> 31) & ((Polynomial >> 6) ^ Polynomial)) ^ + (((crc << 29) >> 31) & (Polynomial >> 5)) ^ + (((crc << 28) >> 31) & (Polynomial >> 4)) ^ + (((crc << 27) >> 31) & (Polynomial >> 3)) ^ + (((crc << 26) >> 31) & (Polynomial >> 2)) ^ + (((crc << 25) >> 31) & (Polynomial >> 1)) ^ + (((crc << 24) >> 31) & Polynomial); + + crc = ((uint32_t)crc >> 8) ^ c; // convert to unsigned integer before right shift + } + + return ~crc; // same as crc ^ 0xFFFFFFFF +} + + +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4 +/// compute CRC32 (Slicing-by-4 algorithm) +uint32_t crc32_4bytes(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint32_t* current = (const uint32_t*) data; + + // process four bytes at once (Slicing-by-4) + while (length >= 4) + { +#if __BYTE_ORDER == __BIG_ENDIAN + uint32_t one = *current++ ^ swap(crc); + crc = Crc32Lookup[0][ one & 0xFF] ^ + Crc32Lookup[1][(one>> 8) & 0xFF] ^ + Crc32Lookup[2][(one>>16) & 0xFF] ^ + Crc32Lookup[3][(one>>24) & 0xFF]; +#else + uint32_t one = *current++ ^ crc; + crc = Crc32Lookup[0][(one>>24) & 0xFF] ^ + Crc32Lookup[1][(one>>16) & 0xFF] ^ + Crc32Lookup[2][(one>> 8) & 0xFF] ^ + Crc32Lookup[3][ one & 0xFF]; +#endif + + length -= 4; + } + + const uint8_t* currentChar = (const uint8_t*) current; + // remaining 1 to 3 bytes (standard algorithm) + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} +#endif + + +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_8 +/// compute CRC32 (Slicing-by-8 algorithm) +uint32_t crc32_8bytes(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint32_t* current = (const uint32_t*) data; + + // process eight bytes at once (Slicing-by-8) + while (length >= 8) + { +#if __BYTE_ORDER == __BIG_ENDIAN + uint32_t one = *current++ ^ swap(crc); + uint32_t two = *current++; + crc = Crc32Lookup[0][ two & 0xFF] ^ + Crc32Lookup[1][(two>> 8) & 0xFF] ^ + Crc32Lookup[2][(two>>16) & 0xFF] ^ + Crc32Lookup[3][(two>>24) & 0xFF] ^ + Crc32Lookup[4][ one & 0xFF] ^ + Crc32Lookup[5][(one>> 8) & 0xFF] ^ + Crc32Lookup[6][(one>>16) & 0xFF] ^ + Crc32Lookup[7][(one>>24) & 0xFF]; +#else + uint32_t one = *current++ ^ crc; + uint32_t two = *current++; + crc = Crc32Lookup[0][(two>>24) & 0xFF] ^ + Crc32Lookup[1][(two>>16) & 0xFF] ^ + Crc32Lookup[2][(two>> 8) & 0xFF] ^ + Crc32Lookup[3][ two & 0xFF] ^ + Crc32Lookup[4][(one>>24) & 0xFF] ^ + Crc32Lookup[5][(one>>16) & 0xFF] ^ + Crc32Lookup[6][(one>> 8) & 0xFF] ^ + Crc32Lookup[7][ one & 0xFF]; +#endif + + length -= 8; + } + + const uint8_t* currentChar = (const uint8_t*) current; + // remaining 1 to 7 bytes (standard algorithm) + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} + + +/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times +uint32_t crc32_4x8bytes(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint32_t* current = (const uint32_t*) data; + + // enabling optimization (at least -O2) automatically unrolls the inner for-loop + const size_t Unroll = 4; + const size_t BytesAtOnce = 8 * Unroll; + + // process 4x eight bytes at once (Slicing-by-8) + while (length >= BytesAtOnce) + { + for (size_t unrolling = 0; unrolling < Unroll; unrolling++) + { +#if __BYTE_ORDER == __BIG_ENDIAN + uint32_t one = *current++ ^ swap(crc); + uint32_t two = *current++; + crc = Crc32Lookup[0][ two & 0xFF] ^ + Crc32Lookup[1][(two>> 8) & 0xFF] ^ + Crc32Lookup[2][(two>>16) & 0xFF] ^ + Crc32Lookup[3][(two>>24) & 0xFF] ^ + Crc32Lookup[4][ one & 0xFF] ^ + Crc32Lookup[5][(one>> 8) & 0xFF] ^ + Crc32Lookup[6][(one>>16) & 0xFF] ^ + Crc32Lookup[7][(one>>24) & 0xFF]; +#else + uint32_t one = *current++ ^ crc; + uint32_t two = *current++; + crc = Crc32Lookup[0][(two>>24) & 0xFF] ^ + Crc32Lookup[1][(two>>16) & 0xFF] ^ + Crc32Lookup[2][(two>> 8) & 0xFF] ^ + Crc32Lookup[3][ two & 0xFF] ^ + Crc32Lookup[4][(one>>24) & 0xFF] ^ + Crc32Lookup[5][(one>>16) & 0xFF] ^ + Crc32Lookup[6][(one>> 8) & 0xFF] ^ + Crc32Lookup[7][ one & 0xFF]; +#endif + + } + + length -= BytesAtOnce; + } + + const uint8_t* currentChar = (const uint8_t*) current; + // remaining 1 to 31 bytes (standard algorithm) + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} +#endif // CRC32_USE_LOOKUP_TABLE_SLICING_BY_8 + + +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 +/// compute CRC32 (Slicing-by-16 algorithm) +uint32_t crc32_16bytes(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint32_t* current = (const uint32_t*) data; + + // enabling optimization (at least -O2) automatically unrolls the inner for-loop + const size_t Unroll = 4; + const size_t BytesAtOnce = 16 * Unroll; + + while (length >= BytesAtOnce) + { + for (size_t unrolling = 0; unrolling < Unroll; unrolling++) + { +#if __BYTE_ORDER == __BIG_ENDIAN + uint32_t one = *current++ ^ swap(crc); + uint32_t two = *current++; + uint32_t three = *current++; + uint32_t four = *current++; + crc = Crc32Lookup[ 0][ four & 0xFF] ^ + Crc32Lookup[ 1][(four >> 8) & 0xFF] ^ + Crc32Lookup[ 2][(four >> 16) & 0xFF] ^ + Crc32Lookup[ 3][(four >> 24) & 0xFF] ^ + Crc32Lookup[ 4][ three & 0xFF] ^ + Crc32Lookup[ 5][(three >> 8) & 0xFF] ^ + Crc32Lookup[ 6][(three >> 16) & 0xFF] ^ + Crc32Lookup[ 7][(three >> 24) & 0xFF] ^ + Crc32Lookup[ 8][ two & 0xFF] ^ + Crc32Lookup[ 9][(two >> 8) & 0xFF] ^ + Crc32Lookup[10][(two >> 16) & 0xFF] ^ + Crc32Lookup[11][(two >> 24) & 0xFF] ^ + Crc32Lookup[12][ one & 0xFF] ^ + Crc32Lookup[13][(one >> 8) & 0xFF] ^ + Crc32Lookup[14][(one >> 16) & 0xFF] ^ + Crc32Lookup[15][(one >> 24) & 0xFF]; +#else + uint32_t one = *current++ ^ crc; + uint32_t two = *current++; + uint32_t three = *current++; + uint32_t four = *current++; + crc = Crc32Lookup[ 0][(four >> 24) & 0xFF] ^ + Crc32Lookup[ 1][(four >> 16) & 0xFF] ^ + Crc32Lookup[ 2][(four >> 8) & 0xFF] ^ + Crc32Lookup[ 3][ four & 0xFF] ^ + Crc32Lookup[ 4][(three >> 24) & 0xFF] ^ + Crc32Lookup[ 5][(three >> 16) & 0xFF] ^ + Crc32Lookup[ 6][(three >> 8) & 0xFF] ^ + Crc32Lookup[ 7][ three & 0xFF] ^ + Crc32Lookup[ 8][(two >> 24) & 0xFF] ^ + Crc32Lookup[ 9][(two >> 16) & 0xFF] ^ + Crc32Lookup[10][(two >> 8) & 0xFF] ^ + Crc32Lookup[11][ two & 0xFF] ^ + Crc32Lookup[12][(one >> 24) & 0xFF] ^ + Crc32Lookup[13][(one >> 16) & 0xFF] ^ + Crc32Lookup[14][(one >> 8) & 0xFF] ^ + Crc32Lookup[15][ one & 0xFF]; +#endif + } + + length -= BytesAtOnce; + } + + const uint8_t* currentChar = (const uint8_t*) current; + // remaining 1 to 63 bytes (standard algorithm) + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} + + +/// compute CRC32 (Slicing-by-16 algorithm, prefetch upcoming data blocks) +uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previousCrc32, size_t prefetchAhead) +{ + // CRC code is identical to crc32_16bytes (including unrolling), only added prefetching + // 256 bytes look-ahead seems to be the sweet spot on Core i7 CPUs + + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint32_t* current = (const uint32_t*) data; + + // enabling optimization (at least -O2) automatically unrolls the for-loop + const size_t Unroll = 4; + const size_t BytesAtOnce = 16 * Unroll; + + while (length >= BytesAtOnce + prefetchAhead) + { + PREFETCH(((const char*) current) + prefetchAhead); + + for (size_t unrolling = 0; unrolling < Unroll; unrolling++) + { +#if __BYTE_ORDER == __BIG_ENDIAN + uint32_t one = *current++ ^ swap(crc); + uint32_t two = *current++; + uint32_t three = *current++; + uint32_t four = *current++; + crc = Crc32Lookup[ 0][ four & 0xFF] ^ + Crc32Lookup[ 1][(four >> 8) & 0xFF] ^ + Crc32Lookup[ 2][(four >> 16) & 0xFF] ^ + Crc32Lookup[ 3][(four >> 24) & 0xFF] ^ + Crc32Lookup[ 4][ three & 0xFF] ^ + Crc32Lookup[ 5][(three >> 8) & 0xFF] ^ + Crc32Lookup[ 6][(three >> 16) & 0xFF] ^ + Crc32Lookup[ 7][(three >> 24) & 0xFF] ^ + Crc32Lookup[ 8][ two & 0xFF] ^ + Crc32Lookup[ 9][(two >> 8) & 0xFF] ^ + Crc32Lookup[10][(two >> 16) & 0xFF] ^ + Crc32Lookup[11][(two >> 24) & 0xFF] ^ + Crc32Lookup[12][ one & 0xFF] ^ + Crc32Lookup[13][(one >> 8) & 0xFF] ^ + Crc32Lookup[14][(one >> 16) & 0xFF] ^ + Crc32Lookup[15][(one >> 24) & 0xFF]; +#else + uint32_t one = *current++ ^ crc; + uint32_t two = *current++; + uint32_t three = *current++; + uint32_t four = *current++; + crc = Crc32Lookup[ 0][(four >> 24) & 0xFF] ^ + Crc32Lookup[ 1][(four >> 16) & 0xFF] ^ + Crc32Lookup[ 2][(four >> 8) & 0xFF] ^ + Crc32Lookup[ 3][ four & 0xFF] ^ + Crc32Lookup[ 4][(three >> 24) & 0xFF] ^ + Crc32Lookup[ 5][(three >> 16) & 0xFF] ^ + Crc32Lookup[ 6][(three >> 8) & 0xFF] ^ + Crc32Lookup[ 7][ three & 0xFF] ^ + Crc32Lookup[ 8][(two >> 24) & 0xFF] ^ + Crc32Lookup[ 9][(two >> 16) & 0xFF] ^ + Crc32Lookup[10][(two >> 8) & 0xFF] ^ + Crc32Lookup[11][ two & 0xFF] ^ + Crc32Lookup[12][(one >> 24) & 0xFF] ^ + Crc32Lookup[13][(one >> 16) & 0xFF] ^ + Crc32Lookup[14][(one >> 8) & 0xFF] ^ + Crc32Lookup[15][ one & 0xFF]; +#endif + } + + length -= BytesAtOnce; + } + + const uint8_t* currentChar = (const uint8_t*) current; + // remaining 1 to 63 bytes (standard algorithm) + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} +#endif + + +/// compute CRC32 using the fastest algorithm for large datasets on modern CPUs +uint32_t crc32_fast(const void* data, size_t length, uint32_t previousCrc32) +{ +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 + return crc32_16bytes (data, length, previousCrc32); +#elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_8) + return crc32_8bytes (data, length, previousCrc32); +#elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_4) + return crc32_4bytes (data, length, previousCrc32); +#elif defined(CRC32_USE_LOOKUP_TABLE_BYTE) + return crc32_1byte (data, length, previousCrc32); +#else + return crc32_halfbyte(data, length, previousCrc32); +#endif +} + + +/// merge two CRC32 such that result = crc32(dataB, lengthB, crc32(dataA, lengthA)) +uint32_t crc32_combine(uint32_t crcA, uint32_t crcB, size_t lengthB) +{ + // based on Mark Adler's crc_combine from + // https://github.com/madler/pigz/blob/master/pigz.c + + // main idea: + // - if you have two equally-sized blocks A and B, + // then you can create a block C = A ^ B + // which has the property crc(C) = crc(A) ^ crc(B) + // - if you append length(B) zeros to A and call it A' (think of it as AAAA000) + // and prepend length(A) zeros to B and call it B' (think of it as 0000BBB) + // then exists a C' = A' ^ B' + // - remember: if you XOR someting with zero, it remains unchanged: X ^ 0 = X + // - that means C' = A concat B so that crc(A concat B) = crc(C') = crc(A') ^ crc(B') + // - the trick is to compute crc(A') based on crc(A) + // and crc(B') based on crc(B) + // - since B' starts with many zeros, the crc of those initial zeros is still zero + // - that means crc(B') = crc(B) + // - unfortunately the trailing zeros of A' change the crc, so usually crc(A') != crc(A) + // - the following code is a fast algorithm to compute crc(A') + // - starting with crc(A) and appending length(B) zeros, needing just log2(length(B)) iterations + // - the details are explained by the original author at + // https://stackoverflow.com/questions/23122312/crc-calculation-of-a-mostly-static-data-stream/23126768 + // + // notes: + // - I squeezed everything into one function to keep global namespace clean (original code two helper functions) + // - most original comments are still in place, I added comments where these helper functions where made inline code + // - performance-wise there isn't any differenze to the original zlib/pigz code + + // degenerated case + if (lengthB == 0) + return crcA; + + /// CRC32 => 32 bits + const uint32_t CrcBits = 32; + + uint32_t odd [CrcBits]; // odd-power-of-two zeros operator + uint32_t even[CrcBits]; // even-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = Polynomial; // CRC-32 polynomial + for (uint32_t i = 1; i < CrcBits; i++) + odd[i] = 1 << (i - 1); + + // put operator for two zero bits in even + // same as gf2_matrix_square(even, odd); + for (uint32_t i = 0; i < CrcBits; i++) + { + uint32_t vec = odd[i]; + even[i] = 0; + for (int j = 0; vec != 0; j++, vec >>= 1) + if (vec & 1) + even[i] ^= odd[j]; + } + // put operator for four zero bits in odd + // same as gf2_matrix_square(odd, even); + for (uint32_t i = 0; i < CrcBits; i++) + { + uint32_t vec = even[i]; + odd[i] = 0; + for (int j = 0; vec != 0; j++, vec >>= 1) + if (vec & 1) + odd[i] ^= even[j]; + } + + // the following loop becomes much shorter if I keep swapping even and odd + uint32_t* a = even; + uint32_t* b = odd; + // apply secondLength zeros to firstCrc32 + for (; lengthB > 0; lengthB >>= 1) + { + // same as gf2_matrix_square(a, b); + for (uint32_t i = 0; i < CrcBits; i++) + { + uint32_t vec = b[i]; + a[i] = 0; + for (int j = 0; vec != 0; j++, vec >>= 1) + if (vec & 1) + a[i] ^= b[j]; + } + + // apply zeros operator for this bit + if (lengthB & 1) + { + // same as firstCrc32 = gf2_matrix_times(a, firstCrc32); + uint32_t sum = 0; + for (int i = 0; crcA != 0; i++, crcA >>= 1) + if (crcA & 1) + sum ^= a[i]; + crcA = sum; + } + + // switch even and odd + uint32_t* t = a; a = b; b = t; + } + + // return combined crc + return crcA ^ crcB; +} + + +// ////////////////////////////////////////////////////////// +// constants + + +#ifndef NO_LUT +/// look-up table, already declared above +const uint32_t Crc32Lookup[MaxSlice][256] = +{ + //// same algorithm as crc32_bitwise + //for (int i = 0; i <= 0xFF; i++) + //{ + // uint32_t crc = i; + // for (int j = 0; j < 8; j++) + // crc = (crc >> 1) ^ ((crc & 1) * Polynomial); + // Crc32Lookup[0][i] = crc; + //} + //// ... and the following slicing-by-8 algorithm (from Intel): + //// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf + //// http://sourceforge.net/projects/slicing-by-8/ + //for (int slice = 1; slice < MaxSlice; slice++) + // Crc32Lookup[slice][i] = (Crc32Lookup[slice - 1][i] >> 8) ^ Crc32Lookup[0][Crc32Lookup[slice - 1][i] & 0xFF]; + { + // note: the first number of every second row corresponds to the half-byte look-up table ! + 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3, + 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91, + 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7, + 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5, + 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B, + 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59, + 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F, + 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D, + 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433, + 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01, + 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457, + 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65, + 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB, + 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9, + 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F, + 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD, + 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683, + 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1, + 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7, + 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5, + 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B, + 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79, + 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F, + 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D, + 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713, + 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21, + 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777, + 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45, + 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB, + 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9, + 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF, + 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D, + } + +#if defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_4) || defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_8) || defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_16) + // beyond this point only relevant for Slicing-by-4, Slicing-by-8 and Slicing-by-16 + ,{ + 0x00000000,0x191B3141,0x32366282,0x2B2D53C3,0x646CC504,0x7D77F445,0x565AA786,0x4F4196C7, + 0xC8D98A08,0xD1C2BB49,0xFAEFE88A,0xE3F4D9CB,0xACB54F0C,0xB5AE7E4D,0x9E832D8E,0x87981CCF, + 0x4AC21251,0x53D92310,0x78F470D3,0x61EF4192,0x2EAED755,0x37B5E614,0x1C98B5D7,0x05838496, + 0x821B9859,0x9B00A918,0xB02DFADB,0xA936CB9A,0xE6775D5D,0xFF6C6C1C,0xD4413FDF,0xCD5A0E9E, + 0x958424A2,0x8C9F15E3,0xA7B24620,0xBEA97761,0xF1E8E1A6,0xE8F3D0E7,0xC3DE8324,0xDAC5B265, + 0x5D5DAEAA,0x44469FEB,0x6F6BCC28,0x7670FD69,0x39316BAE,0x202A5AEF,0x0B07092C,0x121C386D, + 0xDF4636F3,0xC65D07B2,0xED705471,0xF46B6530,0xBB2AF3F7,0xA231C2B6,0x891C9175,0x9007A034, + 0x179FBCFB,0x0E848DBA,0x25A9DE79,0x3CB2EF38,0x73F379FF,0x6AE848BE,0x41C51B7D,0x58DE2A3C, + 0xF0794F05,0xE9627E44,0xC24F2D87,0xDB541CC6,0x94158A01,0x8D0EBB40,0xA623E883,0xBF38D9C2, + 0x38A0C50D,0x21BBF44C,0x0A96A78F,0x138D96CE,0x5CCC0009,0x45D73148,0x6EFA628B,0x77E153CA, + 0xBABB5D54,0xA3A06C15,0x888D3FD6,0x91960E97,0xDED79850,0xC7CCA911,0xECE1FAD2,0xF5FACB93, + 0x7262D75C,0x6B79E61D,0x4054B5DE,0x594F849F,0x160E1258,0x0F152319,0x243870DA,0x3D23419B, + 0x65FD6BA7,0x7CE65AE6,0x57CB0925,0x4ED03864,0x0191AEA3,0x188A9FE2,0x33A7CC21,0x2ABCFD60, + 0xAD24E1AF,0xB43FD0EE,0x9F12832D,0x8609B26C,0xC94824AB,0xD05315EA,0xFB7E4629,0xE2657768, + 0x2F3F79F6,0x362448B7,0x1D091B74,0x04122A35,0x4B53BCF2,0x52488DB3,0x7965DE70,0x607EEF31, + 0xE7E6F3FE,0xFEFDC2BF,0xD5D0917C,0xCCCBA03D,0x838A36FA,0x9A9107BB,0xB1BC5478,0xA8A76539, + 0x3B83984B,0x2298A90A,0x09B5FAC9,0x10AECB88,0x5FEF5D4F,0x46F46C0E,0x6DD93FCD,0x74C20E8C, + 0xF35A1243,0xEA412302,0xC16C70C1,0xD8774180,0x9736D747,0x8E2DE606,0xA500B5C5,0xBC1B8484, + 0x71418A1A,0x685ABB5B,0x4377E898,0x5A6CD9D9,0x152D4F1E,0x0C367E5F,0x271B2D9C,0x3E001CDD, + 0xB9980012,0xA0833153,0x8BAE6290,0x92B553D1,0xDDF4C516,0xC4EFF457,0xEFC2A794,0xF6D996D5, + 0xAE07BCE9,0xB71C8DA8,0x9C31DE6B,0x852AEF2A,0xCA6B79ED,0xD37048AC,0xF85D1B6F,0xE1462A2E, + 0x66DE36E1,0x7FC507A0,0x54E85463,0x4DF36522,0x02B2F3E5,0x1BA9C2A4,0x30849167,0x299FA026, + 0xE4C5AEB8,0xFDDE9FF9,0xD6F3CC3A,0xCFE8FD7B,0x80A96BBC,0x99B25AFD,0xB29F093E,0xAB84387F, + 0x2C1C24B0,0x350715F1,0x1E2A4632,0x07317773,0x4870E1B4,0x516BD0F5,0x7A468336,0x635DB277, + 0xCBFAD74E,0xD2E1E60F,0xF9CCB5CC,0xE0D7848D,0xAF96124A,0xB68D230B,0x9DA070C8,0x84BB4189, + 0x03235D46,0x1A386C07,0x31153FC4,0x280E0E85,0x674F9842,0x7E54A903,0x5579FAC0,0x4C62CB81, + 0x8138C51F,0x9823F45E,0xB30EA79D,0xAA1596DC,0xE554001B,0xFC4F315A,0xD7626299,0xCE7953D8, + 0x49E14F17,0x50FA7E56,0x7BD72D95,0x62CC1CD4,0x2D8D8A13,0x3496BB52,0x1FBBE891,0x06A0D9D0, + 0x5E7EF3EC,0x4765C2AD,0x6C48916E,0x7553A02F,0x3A1236E8,0x230907A9,0x0824546A,0x113F652B, + 0x96A779E4,0x8FBC48A5,0xA4911B66,0xBD8A2A27,0xF2CBBCE0,0xEBD08DA1,0xC0FDDE62,0xD9E6EF23, + 0x14BCE1BD,0x0DA7D0FC,0x268A833F,0x3F91B27E,0x70D024B9,0x69CB15F8,0x42E6463B,0x5BFD777A, + 0xDC656BB5,0xC57E5AF4,0xEE530937,0xF7483876,0xB809AEB1,0xA1129FF0,0x8A3FCC33,0x9324FD72, + }, + + { + 0x00000000,0x01C26A37,0x0384D46E,0x0246BE59,0x0709A8DC,0x06CBC2EB,0x048D7CB2,0x054F1685, + 0x0E1351B8,0x0FD13B8F,0x0D9785D6,0x0C55EFE1,0x091AF964,0x08D89353,0x0A9E2D0A,0x0B5C473D, + 0x1C26A370,0x1DE4C947,0x1FA2771E,0x1E601D29,0x1B2F0BAC,0x1AED619B,0x18ABDFC2,0x1969B5F5, + 0x1235F2C8,0x13F798FF,0x11B126A6,0x10734C91,0x153C5A14,0x14FE3023,0x16B88E7A,0x177AE44D, + 0x384D46E0,0x398F2CD7,0x3BC9928E,0x3A0BF8B9,0x3F44EE3C,0x3E86840B,0x3CC03A52,0x3D025065, + 0x365E1758,0x379C7D6F,0x35DAC336,0x3418A901,0x3157BF84,0x3095D5B3,0x32D36BEA,0x331101DD, + 0x246BE590,0x25A98FA7,0x27EF31FE,0x262D5BC9,0x23624D4C,0x22A0277B,0x20E69922,0x2124F315, + 0x2A78B428,0x2BBADE1F,0x29FC6046,0x283E0A71,0x2D711CF4,0x2CB376C3,0x2EF5C89A,0x2F37A2AD, + 0x709A8DC0,0x7158E7F7,0x731E59AE,0x72DC3399,0x7793251C,0x76514F2B,0x7417F172,0x75D59B45, + 0x7E89DC78,0x7F4BB64F,0x7D0D0816,0x7CCF6221,0x798074A4,0x78421E93,0x7A04A0CA,0x7BC6CAFD, + 0x6CBC2EB0,0x6D7E4487,0x6F38FADE,0x6EFA90E9,0x6BB5866C,0x6A77EC5B,0x68315202,0x69F33835, + 0x62AF7F08,0x636D153F,0x612BAB66,0x60E9C151,0x65A6D7D4,0x6464BDE3,0x662203BA,0x67E0698D, + 0x48D7CB20,0x4915A117,0x4B531F4E,0x4A917579,0x4FDE63FC,0x4E1C09CB,0x4C5AB792,0x4D98DDA5, + 0x46C49A98,0x4706F0AF,0x45404EF6,0x448224C1,0x41CD3244,0x400F5873,0x4249E62A,0x438B8C1D, + 0x54F16850,0x55330267,0x5775BC3E,0x56B7D609,0x53F8C08C,0x523AAABB,0x507C14E2,0x51BE7ED5, + 0x5AE239E8,0x5B2053DF,0x5966ED86,0x58A487B1,0x5DEB9134,0x5C29FB03,0x5E6F455A,0x5FAD2F6D, + 0xE1351B80,0xE0F771B7,0xE2B1CFEE,0xE373A5D9,0xE63CB35C,0xE7FED96B,0xE5B86732,0xE47A0D05, + 0xEF264A38,0xEEE4200F,0xECA29E56,0xED60F461,0xE82FE2E4,0xE9ED88D3,0xEBAB368A,0xEA695CBD, + 0xFD13B8F0,0xFCD1D2C7,0xFE976C9E,0xFF5506A9,0xFA1A102C,0xFBD87A1B,0xF99EC442,0xF85CAE75, + 0xF300E948,0xF2C2837F,0xF0843D26,0xF1465711,0xF4094194,0xF5CB2BA3,0xF78D95FA,0xF64FFFCD, + 0xD9785D60,0xD8BA3757,0xDAFC890E,0xDB3EE339,0xDE71F5BC,0xDFB39F8B,0xDDF521D2,0xDC374BE5, + 0xD76B0CD8,0xD6A966EF,0xD4EFD8B6,0xD52DB281,0xD062A404,0xD1A0CE33,0xD3E6706A,0xD2241A5D, + 0xC55EFE10,0xC49C9427,0xC6DA2A7E,0xC7184049,0xC25756CC,0xC3953CFB,0xC1D382A2,0xC011E895, + 0xCB4DAFA8,0xCA8FC59F,0xC8C97BC6,0xC90B11F1,0xCC440774,0xCD866D43,0xCFC0D31A,0xCE02B92D, + 0x91AF9640,0x906DFC77,0x922B422E,0x93E92819,0x96A63E9C,0x976454AB,0x9522EAF2,0x94E080C5, + 0x9FBCC7F8,0x9E7EADCF,0x9C381396,0x9DFA79A1,0x98B56F24,0x99770513,0x9B31BB4A,0x9AF3D17D, + 0x8D893530,0x8C4B5F07,0x8E0DE15E,0x8FCF8B69,0x8A809DEC,0x8B42F7DB,0x89044982,0x88C623B5, + 0x839A6488,0x82580EBF,0x801EB0E6,0x81DCDAD1,0x8493CC54,0x8551A663,0x8717183A,0x86D5720D, + 0xA9E2D0A0,0xA820BA97,0xAA6604CE,0xABA46EF9,0xAEEB787C,0xAF29124B,0xAD6FAC12,0xACADC625, + 0xA7F18118,0xA633EB2F,0xA4755576,0xA5B73F41,0xA0F829C4,0xA13A43F3,0xA37CFDAA,0xA2BE979D, + 0xB5C473D0,0xB40619E7,0xB640A7BE,0xB782CD89,0xB2CDDB0C,0xB30FB13B,0xB1490F62,0xB08B6555, + 0xBBD72268,0xBA15485F,0xB853F606,0xB9919C31,0xBCDE8AB4,0xBD1CE083,0xBF5A5EDA,0xBE9834ED, + }, + + { + 0x00000000,0xB8BC6765,0xAA09C88B,0x12B5AFEE,0x8F629757,0x37DEF032,0x256B5FDC,0x9DD738B9, + 0xC5B428EF,0x7D084F8A,0x6FBDE064,0xD7018701,0x4AD6BFB8,0xF26AD8DD,0xE0DF7733,0x58631056, + 0x5019579F,0xE8A530FA,0xFA109F14,0x42ACF871,0xDF7BC0C8,0x67C7A7AD,0x75720843,0xCDCE6F26, + 0x95AD7F70,0x2D111815,0x3FA4B7FB,0x8718D09E,0x1ACFE827,0xA2738F42,0xB0C620AC,0x087A47C9, + 0xA032AF3E,0x188EC85B,0x0A3B67B5,0xB28700D0,0x2F503869,0x97EC5F0C,0x8559F0E2,0x3DE59787, + 0x658687D1,0xDD3AE0B4,0xCF8F4F5A,0x7733283F,0xEAE41086,0x525877E3,0x40EDD80D,0xF851BF68, + 0xF02BF8A1,0x48979FC4,0x5A22302A,0xE29E574F,0x7F496FF6,0xC7F50893,0xD540A77D,0x6DFCC018, + 0x359FD04E,0x8D23B72B,0x9F9618C5,0x272A7FA0,0xBAFD4719,0x0241207C,0x10F48F92,0xA848E8F7, + 0x9B14583D,0x23A83F58,0x311D90B6,0x89A1F7D3,0x1476CF6A,0xACCAA80F,0xBE7F07E1,0x06C36084, + 0x5EA070D2,0xE61C17B7,0xF4A9B859,0x4C15DF3C,0xD1C2E785,0x697E80E0,0x7BCB2F0E,0xC377486B, + 0xCB0D0FA2,0x73B168C7,0x6104C729,0xD9B8A04C,0x446F98F5,0xFCD3FF90,0xEE66507E,0x56DA371B, + 0x0EB9274D,0xB6054028,0xA4B0EFC6,0x1C0C88A3,0x81DBB01A,0x3967D77F,0x2BD27891,0x936E1FF4, + 0x3B26F703,0x839A9066,0x912F3F88,0x299358ED,0xB4446054,0x0CF80731,0x1E4DA8DF,0xA6F1CFBA, + 0xFE92DFEC,0x462EB889,0x549B1767,0xEC277002,0x71F048BB,0xC94C2FDE,0xDBF98030,0x6345E755, + 0x6B3FA09C,0xD383C7F9,0xC1366817,0x798A0F72,0xE45D37CB,0x5CE150AE,0x4E54FF40,0xF6E89825, + 0xAE8B8873,0x1637EF16,0x048240F8,0xBC3E279D,0x21E91F24,0x99557841,0x8BE0D7AF,0x335CB0CA, + 0xED59B63B,0x55E5D15E,0x47507EB0,0xFFEC19D5,0x623B216C,0xDA874609,0xC832E9E7,0x708E8E82, + 0x28ED9ED4,0x9051F9B1,0x82E4565F,0x3A58313A,0xA78F0983,0x1F336EE6,0x0D86C108,0xB53AA66D, + 0xBD40E1A4,0x05FC86C1,0x1749292F,0xAFF54E4A,0x322276F3,0x8A9E1196,0x982BBE78,0x2097D91D, + 0x78F4C94B,0xC048AE2E,0xD2FD01C0,0x6A4166A5,0xF7965E1C,0x4F2A3979,0x5D9F9697,0xE523F1F2, + 0x4D6B1905,0xF5D77E60,0xE762D18E,0x5FDEB6EB,0xC2098E52,0x7AB5E937,0x680046D9,0xD0BC21BC, + 0x88DF31EA,0x3063568F,0x22D6F961,0x9A6A9E04,0x07BDA6BD,0xBF01C1D8,0xADB46E36,0x15080953, + 0x1D724E9A,0xA5CE29FF,0xB77B8611,0x0FC7E174,0x9210D9CD,0x2AACBEA8,0x38191146,0x80A57623, + 0xD8C66675,0x607A0110,0x72CFAEFE,0xCA73C99B,0x57A4F122,0xEF189647,0xFDAD39A9,0x45115ECC, + 0x764DEE06,0xCEF18963,0xDC44268D,0x64F841E8,0xF92F7951,0x41931E34,0x5326B1DA,0xEB9AD6BF, + 0xB3F9C6E9,0x0B45A18C,0x19F00E62,0xA14C6907,0x3C9B51BE,0x842736DB,0x96929935,0x2E2EFE50, + 0x2654B999,0x9EE8DEFC,0x8C5D7112,0x34E11677,0xA9362ECE,0x118A49AB,0x033FE645,0xBB838120, + 0xE3E09176,0x5B5CF613,0x49E959FD,0xF1553E98,0x6C820621,0xD43E6144,0xC68BCEAA,0x7E37A9CF, + 0xD67F4138,0x6EC3265D,0x7C7689B3,0xC4CAEED6,0x591DD66F,0xE1A1B10A,0xF3141EE4,0x4BA87981, + 0x13CB69D7,0xAB770EB2,0xB9C2A15C,0x017EC639,0x9CA9FE80,0x241599E5,0x36A0360B,0x8E1C516E, + 0x866616A7,0x3EDA71C2,0x2C6FDE2C,0x94D3B949,0x090481F0,0xB1B8E695,0xA30D497B,0x1BB12E1E, + 0x43D23E48,0xFB6E592D,0xE9DBF6C3,0x516791A6,0xCCB0A91F,0x740CCE7A,0x66B96194,0xDE0506F1, + } +#endif // defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_4) || defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_8) || defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_16) +#if defined (CRC32_USE_LOOKUP_TABLE_SLICING_BY_8) || defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_16) + // beyond this point only relevant for Slicing-by-8 and Slicing-by-16 + ,{ + 0x00000000,0x3D6029B0,0x7AC05360,0x47A07AD0,0xF580A6C0,0xC8E08F70,0x8F40F5A0,0xB220DC10, + 0x30704BC1,0x0D106271,0x4AB018A1,0x77D03111,0xC5F0ED01,0xF890C4B1,0xBF30BE61,0x825097D1, + 0x60E09782,0x5D80BE32,0x1A20C4E2,0x2740ED52,0x95603142,0xA80018F2,0xEFA06222,0xD2C04B92, + 0x5090DC43,0x6DF0F5F3,0x2A508F23,0x1730A693,0xA5107A83,0x98705333,0xDFD029E3,0xE2B00053, + 0xC1C12F04,0xFCA106B4,0xBB017C64,0x866155D4,0x344189C4,0x0921A074,0x4E81DAA4,0x73E1F314, + 0xF1B164C5,0xCCD14D75,0x8B7137A5,0xB6111E15,0x0431C205,0x3951EBB5,0x7EF19165,0x4391B8D5, + 0xA121B886,0x9C419136,0xDBE1EBE6,0xE681C256,0x54A11E46,0x69C137F6,0x2E614D26,0x13016496, + 0x9151F347,0xAC31DAF7,0xEB91A027,0xD6F18997,0x64D15587,0x59B17C37,0x1E1106E7,0x23712F57, + 0x58F35849,0x659371F9,0x22330B29,0x1F532299,0xAD73FE89,0x9013D739,0xD7B3ADE9,0xEAD38459, + 0x68831388,0x55E33A38,0x124340E8,0x2F236958,0x9D03B548,0xA0639CF8,0xE7C3E628,0xDAA3CF98, + 0x3813CFCB,0x0573E67B,0x42D39CAB,0x7FB3B51B,0xCD93690B,0xF0F340BB,0xB7533A6B,0x8A3313DB, + 0x0863840A,0x3503ADBA,0x72A3D76A,0x4FC3FEDA,0xFDE322CA,0xC0830B7A,0x872371AA,0xBA43581A, + 0x9932774D,0xA4525EFD,0xE3F2242D,0xDE920D9D,0x6CB2D18D,0x51D2F83D,0x167282ED,0x2B12AB5D, + 0xA9423C8C,0x9422153C,0xD3826FEC,0xEEE2465C,0x5CC29A4C,0x61A2B3FC,0x2602C92C,0x1B62E09C, + 0xF9D2E0CF,0xC4B2C97F,0x8312B3AF,0xBE729A1F,0x0C52460F,0x31326FBF,0x7692156F,0x4BF23CDF, + 0xC9A2AB0E,0xF4C282BE,0xB362F86E,0x8E02D1DE,0x3C220DCE,0x0142247E,0x46E25EAE,0x7B82771E, + 0xB1E6B092,0x8C869922,0xCB26E3F2,0xF646CA42,0x44661652,0x79063FE2,0x3EA64532,0x03C66C82, + 0x8196FB53,0xBCF6D2E3,0xFB56A833,0xC6368183,0x74165D93,0x49767423,0x0ED60EF3,0x33B62743, + 0xD1062710,0xEC660EA0,0xABC67470,0x96A65DC0,0x248681D0,0x19E6A860,0x5E46D2B0,0x6326FB00, + 0xE1766CD1,0xDC164561,0x9BB63FB1,0xA6D61601,0x14F6CA11,0x2996E3A1,0x6E369971,0x5356B0C1, + 0x70279F96,0x4D47B626,0x0AE7CCF6,0x3787E546,0x85A73956,0xB8C710E6,0xFF676A36,0xC2074386, + 0x4057D457,0x7D37FDE7,0x3A978737,0x07F7AE87,0xB5D77297,0x88B75B27,0xCF1721F7,0xF2770847, + 0x10C70814,0x2DA721A4,0x6A075B74,0x576772C4,0xE547AED4,0xD8278764,0x9F87FDB4,0xA2E7D404, + 0x20B743D5,0x1DD76A65,0x5A7710B5,0x67173905,0xD537E515,0xE857CCA5,0xAFF7B675,0x92979FC5, + 0xE915E8DB,0xD475C16B,0x93D5BBBB,0xAEB5920B,0x1C954E1B,0x21F567AB,0x66551D7B,0x5B3534CB, + 0xD965A31A,0xE4058AAA,0xA3A5F07A,0x9EC5D9CA,0x2CE505DA,0x11852C6A,0x562556BA,0x6B457F0A, + 0x89F57F59,0xB49556E9,0xF3352C39,0xCE550589,0x7C75D999,0x4115F029,0x06B58AF9,0x3BD5A349, + 0xB9853498,0x84E51D28,0xC34567F8,0xFE254E48,0x4C059258,0x7165BBE8,0x36C5C138,0x0BA5E888, + 0x28D4C7DF,0x15B4EE6F,0x521494BF,0x6F74BD0F,0xDD54611F,0xE03448AF,0xA794327F,0x9AF41BCF, + 0x18A48C1E,0x25C4A5AE,0x6264DF7E,0x5F04F6CE,0xED242ADE,0xD044036E,0x97E479BE,0xAA84500E, + 0x4834505D,0x755479ED,0x32F4033D,0x0F942A8D,0xBDB4F69D,0x80D4DF2D,0xC774A5FD,0xFA148C4D, + 0x78441B9C,0x4524322C,0x028448FC,0x3FE4614C,0x8DC4BD5C,0xB0A494EC,0xF704EE3C,0xCA64C78C, + }, + + { + 0x00000000,0xCB5CD3A5,0x4DC8A10B,0x869472AE,0x9B914216,0x50CD91B3,0xD659E31D,0x1D0530B8, + 0xEC53826D,0x270F51C8,0xA19B2366,0x6AC7F0C3,0x77C2C07B,0xBC9E13DE,0x3A0A6170,0xF156B2D5, + 0x03D6029B,0xC88AD13E,0x4E1EA390,0x85427035,0x9847408D,0x531B9328,0xD58FE186,0x1ED33223, + 0xEF8580F6,0x24D95353,0xA24D21FD,0x6911F258,0x7414C2E0,0xBF481145,0x39DC63EB,0xF280B04E, + 0x07AC0536,0xCCF0D693,0x4A64A43D,0x81387798,0x9C3D4720,0x57619485,0xD1F5E62B,0x1AA9358E, + 0xEBFF875B,0x20A354FE,0xA6372650,0x6D6BF5F5,0x706EC54D,0xBB3216E8,0x3DA66446,0xF6FAB7E3, + 0x047A07AD,0xCF26D408,0x49B2A6A6,0x82EE7503,0x9FEB45BB,0x54B7961E,0xD223E4B0,0x197F3715, + 0xE82985C0,0x23755665,0xA5E124CB,0x6EBDF76E,0x73B8C7D6,0xB8E41473,0x3E7066DD,0xF52CB578, + 0x0F580A6C,0xC404D9C9,0x4290AB67,0x89CC78C2,0x94C9487A,0x5F959BDF,0xD901E971,0x125D3AD4, + 0xE30B8801,0x28575BA4,0xAEC3290A,0x659FFAAF,0x789ACA17,0xB3C619B2,0x35526B1C,0xFE0EB8B9, + 0x0C8E08F7,0xC7D2DB52,0x4146A9FC,0x8A1A7A59,0x971F4AE1,0x5C439944,0xDAD7EBEA,0x118B384F, + 0xE0DD8A9A,0x2B81593F,0xAD152B91,0x6649F834,0x7B4CC88C,0xB0101B29,0x36846987,0xFDD8BA22, + 0x08F40F5A,0xC3A8DCFF,0x453CAE51,0x8E607DF4,0x93654D4C,0x58399EE9,0xDEADEC47,0x15F13FE2, + 0xE4A78D37,0x2FFB5E92,0xA96F2C3C,0x6233FF99,0x7F36CF21,0xB46A1C84,0x32FE6E2A,0xF9A2BD8F, + 0x0B220DC1,0xC07EDE64,0x46EAACCA,0x8DB67F6F,0x90B34FD7,0x5BEF9C72,0xDD7BEEDC,0x16273D79, + 0xE7718FAC,0x2C2D5C09,0xAAB92EA7,0x61E5FD02,0x7CE0CDBA,0xB7BC1E1F,0x31286CB1,0xFA74BF14, + 0x1EB014D8,0xD5ECC77D,0x5378B5D3,0x98246676,0x852156CE,0x4E7D856B,0xC8E9F7C5,0x03B52460, + 0xF2E396B5,0x39BF4510,0xBF2B37BE,0x7477E41B,0x6972D4A3,0xA22E0706,0x24BA75A8,0xEFE6A60D, + 0x1D661643,0xD63AC5E6,0x50AEB748,0x9BF264ED,0x86F75455,0x4DAB87F0,0xCB3FF55E,0x006326FB, + 0xF135942E,0x3A69478B,0xBCFD3525,0x77A1E680,0x6AA4D638,0xA1F8059D,0x276C7733,0xEC30A496, + 0x191C11EE,0xD240C24B,0x54D4B0E5,0x9F886340,0x828D53F8,0x49D1805D,0xCF45F2F3,0x04192156, + 0xF54F9383,0x3E134026,0xB8873288,0x73DBE12D,0x6EDED195,0xA5820230,0x2316709E,0xE84AA33B, + 0x1ACA1375,0xD196C0D0,0x5702B27E,0x9C5E61DB,0x815B5163,0x4A0782C6,0xCC93F068,0x07CF23CD, + 0xF6999118,0x3DC542BD,0xBB513013,0x700DE3B6,0x6D08D30E,0xA65400AB,0x20C07205,0xEB9CA1A0, + 0x11E81EB4,0xDAB4CD11,0x5C20BFBF,0x977C6C1A,0x8A795CA2,0x41258F07,0xC7B1FDA9,0x0CED2E0C, + 0xFDBB9CD9,0x36E74F7C,0xB0733DD2,0x7B2FEE77,0x662ADECF,0xAD760D6A,0x2BE27FC4,0xE0BEAC61, + 0x123E1C2F,0xD962CF8A,0x5FF6BD24,0x94AA6E81,0x89AF5E39,0x42F38D9C,0xC467FF32,0x0F3B2C97, + 0xFE6D9E42,0x35314DE7,0xB3A53F49,0x78F9ECEC,0x65FCDC54,0xAEA00FF1,0x28347D5F,0xE368AEFA, + 0x16441B82,0xDD18C827,0x5B8CBA89,0x90D0692C,0x8DD55994,0x46898A31,0xC01DF89F,0x0B412B3A, + 0xFA1799EF,0x314B4A4A,0xB7DF38E4,0x7C83EB41,0x6186DBF9,0xAADA085C,0x2C4E7AF2,0xE712A957, + 0x15921919,0xDECECABC,0x585AB812,0x93066BB7,0x8E035B0F,0x455F88AA,0xC3CBFA04,0x089729A1, + 0xF9C19B74,0x329D48D1,0xB4093A7F,0x7F55E9DA,0x6250D962,0xA90C0AC7,0x2F987869,0xE4C4ABCC, + }, + + { + 0x00000000,0xA6770BB4,0x979F1129,0x31E81A9D,0xF44F2413,0x52382FA7,0x63D0353A,0xC5A73E8E, + 0x33EF4E67,0x959845D3,0xA4705F4E,0x020754FA,0xC7A06A74,0x61D761C0,0x503F7B5D,0xF64870E9, + 0x67DE9CCE,0xC1A9977A,0xF0418DE7,0x56368653,0x9391B8DD,0x35E6B369,0x040EA9F4,0xA279A240, + 0x5431D2A9,0xF246D91D,0xC3AEC380,0x65D9C834,0xA07EF6BA,0x0609FD0E,0x37E1E793,0x9196EC27, + 0xCFBD399C,0x69CA3228,0x582228B5,0xFE552301,0x3BF21D8F,0x9D85163B,0xAC6D0CA6,0x0A1A0712, + 0xFC5277FB,0x5A257C4F,0x6BCD66D2,0xCDBA6D66,0x081D53E8,0xAE6A585C,0x9F8242C1,0x39F54975, + 0xA863A552,0x0E14AEE6,0x3FFCB47B,0x998BBFCF,0x5C2C8141,0xFA5B8AF5,0xCBB39068,0x6DC49BDC, + 0x9B8CEB35,0x3DFBE081,0x0C13FA1C,0xAA64F1A8,0x6FC3CF26,0xC9B4C492,0xF85CDE0F,0x5E2BD5BB, + 0x440B7579,0xE27C7ECD,0xD3946450,0x75E36FE4,0xB044516A,0x16335ADE,0x27DB4043,0x81AC4BF7, + 0x77E43B1E,0xD19330AA,0xE07B2A37,0x460C2183,0x83AB1F0D,0x25DC14B9,0x14340E24,0xB2430590, + 0x23D5E9B7,0x85A2E203,0xB44AF89E,0x123DF32A,0xD79ACDA4,0x71EDC610,0x4005DC8D,0xE672D739, + 0x103AA7D0,0xB64DAC64,0x87A5B6F9,0x21D2BD4D,0xE47583C3,0x42028877,0x73EA92EA,0xD59D995E, + 0x8BB64CE5,0x2DC14751,0x1C295DCC,0xBA5E5678,0x7FF968F6,0xD98E6342,0xE86679DF,0x4E11726B, + 0xB8590282,0x1E2E0936,0x2FC613AB,0x89B1181F,0x4C162691,0xEA612D25,0xDB8937B8,0x7DFE3C0C, + 0xEC68D02B,0x4A1FDB9F,0x7BF7C102,0xDD80CAB6,0x1827F438,0xBE50FF8C,0x8FB8E511,0x29CFEEA5, + 0xDF879E4C,0x79F095F8,0x48188F65,0xEE6F84D1,0x2BC8BA5F,0x8DBFB1EB,0xBC57AB76,0x1A20A0C2, + 0x8816EAF2,0x2E61E146,0x1F89FBDB,0xB9FEF06F,0x7C59CEE1,0xDA2EC555,0xEBC6DFC8,0x4DB1D47C, + 0xBBF9A495,0x1D8EAF21,0x2C66B5BC,0x8A11BE08,0x4FB68086,0xE9C18B32,0xD82991AF,0x7E5E9A1B, + 0xEFC8763C,0x49BF7D88,0x78576715,0xDE206CA1,0x1B87522F,0xBDF0599B,0x8C184306,0x2A6F48B2, + 0xDC27385B,0x7A5033EF,0x4BB82972,0xEDCF22C6,0x28681C48,0x8E1F17FC,0xBFF70D61,0x198006D5, + 0x47ABD36E,0xE1DCD8DA,0xD034C247,0x7643C9F3,0xB3E4F77D,0x1593FCC9,0x247BE654,0x820CEDE0, + 0x74449D09,0xD23396BD,0xE3DB8C20,0x45AC8794,0x800BB91A,0x267CB2AE,0x1794A833,0xB1E3A387, + 0x20754FA0,0x86024414,0xB7EA5E89,0x119D553D,0xD43A6BB3,0x724D6007,0x43A57A9A,0xE5D2712E, + 0x139A01C7,0xB5ED0A73,0x840510EE,0x22721B5A,0xE7D525D4,0x41A22E60,0x704A34FD,0xD63D3F49, + 0xCC1D9F8B,0x6A6A943F,0x5B828EA2,0xFDF58516,0x3852BB98,0x9E25B02C,0xAFCDAAB1,0x09BAA105, + 0xFFF2D1EC,0x5985DA58,0x686DC0C5,0xCE1ACB71,0x0BBDF5FF,0xADCAFE4B,0x9C22E4D6,0x3A55EF62, + 0xABC30345,0x0DB408F1,0x3C5C126C,0x9A2B19D8,0x5F8C2756,0xF9FB2CE2,0xC813367F,0x6E643DCB, + 0x982C4D22,0x3E5B4696,0x0FB35C0B,0xA9C457BF,0x6C636931,0xCA146285,0xFBFC7818,0x5D8B73AC, + 0x03A0A617,0xA5D7ADA3,0x943FB73E,0x3248BC8A,0xF7EF8204,0x519889B0,0x6070932D,0xC6079899, + 0x304FE870,0x9638E3C4,0xA7D0F959,0x01A7F2ED,0xC400CC63,0x6277C7D7,0x539FDD4A,0xF5E8D6FE, + 0x647E3AD9,0xC209316D,0xF3E12BF0,0x55962044,0x90311ECA,0x3646157E,0x07AE0FE3,0xA1D90457, + 0x579174BE,0xF1E67F0A,0xC00E6597,0x66796E23,0xA3DE50AD,0x05A95B19,0x34414184,0x92364A30, + }, + + { + 0x00000000,0xCCAA009E,0x4225077D,0x8E8F07E3,0x844A0EFA,0x48E00E64,0xC66F0987,0x0AC50919, + 0xD3E51BB5,0x1F4F1B2B,0x91C01CC8,0x5D6A1C56,0x57AF154F,0x9B0515D1,0x158A1232,0xD92012AC, + 0x7CBB312B,0xB01131B5,0x3E9E3656,0xF23436C8,0xF8F13FD1,0x345B3F4F,0xBAD438AC,0x767E3832, + 0xAF5E2A9E,0x63F42A00,0xED7B2DE3,0x21D12D7D,0x2B142464,0xE7BE24FA,0x69312319,0xA59B2387, + 0xF9766256,0x35DC62C8,0xBB53652B,0x77F965B5,0x7D3C6CAC,0xB1966C32,0x3F196BD1,0xF3B36B4F, + 0x2A9379E3,0xE639797D,0x68B67E9E,0xA41C7E00,0xAED97719,0x62737787,0xECFC7064,0x205670FA, + 0x85CD537D,0x496753E3,0xC7E85400,0x0B42549E,0x01875D87,0xCD2D5D19,0x43A25AFA,0x8F085A64, + 0x562848C8,0x9A824856,0x140D4FB5,0xD8A74F2B,0xD2624632,0x1EC846AC,0x9047414F,0x5CED41D1, + 0x299DC2ED,0xE537C273,0x6BB8C590,0xA712C50E,0xADD7CC17,0x617DCC89,0xEFF2CB6A,0x2358CBF4, + 0xFA78D958,0x36D2D9C6,0xB85DDE25,0x74F7DEBB,0x7E32D7A2,0xB298D73C,0x3C17D0DF,0xF0BDD041, + 0x5526F3C6,0x998CF358,0x1703F4BB,0xDBA9F425,0xD16CFD3C,0x1DC6FDA2,0x9349FA41,0x5FE3FADF, + 0x86C3E873,0x4A69E8ED,0xC4E6EF0E,0x084CEF90,0x0289E689,0xCE23E617,0x40ACE1F4,0x8C06E16A, + 0xD0EBA0BB,0x1C41A025,0x92CEA7C6,0x5E64A758,0x54A1AE41,0x980BAEDF,0x1684A93C,0xDA2EA9A2, + 0x030EBB0E,0xCFA4BB90,0x412BBC73,0x8D81BCED,0x8744B5F4,0x4BEEB56A,0xC561B289,0x09CBB217, + 0xAC509190,0x60FA910E,0xEE7596ED,0x22DF9673,0x281A9F6A,0xE4B09FF4,0x6A3F9817,0xA6959889, + 0x7FB58A25,0xB31F8ABB,0x3D908D58,0xF13A8DC6,0xFBFF84DF,0x37558441,0xB9DA83A2,0x7570833C, + 0x533B85DA,0x9F918544,0x111E82A7,0xDDB48239,0xD7718B20,0x1BDB8BBE,0x95548C5D,0x59FE8CC3, + 0x80DE9E6F,0x4C749EF1,0xC2FB9912,0x0E51998C,0x04949095,0xC83E900B,0x46B197E8,0x8A1B9776, + 0x2F80B4F1,0xE32AB46F,0x6DA5B38C,0xA10FB312,0xABCABA0B,0x6760BA95,0xE9EFBD76,0x2545BDE8, + 0xFC65AF44,0x30CFAFDA,0xBE40A839,0x72EAA8A7,0x782FA1BE,0xB485A120,0x3A0AA6C3,0xF6A0A65D, + 0xAA4DE78C,0x66E7E712,0xE868E0F1,0x24C2E06F,0x2E07E976,0xE2ADE9E8,0x6C22EE0B,0xA088EE95, + 0x79A8FC39,0xB502FCA7,0x3B8DFB44,0xF727FBDA,0xFDE2F2C3,0x3148F25D,0xBFC7F5BE,0x736DF520, + 0xD6F6D6A7,0x1A5CD639,0x94D3D1DA,0x5879D144,0x52BCD85D,0x9E16D8C3,0x1099DF20,0xDC33DFBE, + 0x0513CD12,0xC9B9CD8C,0x4736CA6F,0x8B9CCAF1,0x8159C3E8,0x4DF3C376,0xC37CC495,0x0FD6C40B, + 0x7AA64737,0xB60C47A9,0x3883404A,0xF42940D4,0xFEEC49CD,0x32464953,0xBCC94EB0,0x70634E2E, + 0xA9435C82,0x65E95C1C,0xEB665BFF,0x27CC5B61,0x2D095278,0xE1A352E6,0x6F2C5505,0xA386559B, + 0x061D761C,0xCAB77682,0x44387161,0x889271FF,0x825778E6,0x4EFD7878,0xC0727F9B,0x0CD87F05, + 0xD5F86DA9,0x19526D37,0x97DD6AD4,0x5B776A4A,0x51B26353,0x9D1863CD,0x1397642E,0xDF3D64B0, + 0x83D02561,0x4F7A25FF,0xC1F5221C,0x0D5F2282,0x079A2B9B,0xCB302B05,0x45BF2CE6,0x89152C78, + 0x50353ED4,0x9C9F3E4A,0x121039A9,0xDEBA3937,0xD47F302E,0x18D530B0,0x965A3753,0x5AF037CD, + 0xFF6B144A,0x33C114D4,0xBD4E1337,0x71E413A9,0x7B211AB0,0xB78B1A2E,0x39041DCD,0xF5AE1D53, + 0x2C8E0FFF,0xE0240F61,0x6EAB0882,0xA201081C,0xA8C40105,0x646E019B,0xEAE10678,0x264B06E6, + } +#endif // CRC32_USE_LOOKUP_TABLE_SLICING_BY_8 || CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 +#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 + // beyond this point only relevant for Slicing-by-16 + ,{ + 0x00000000,0x177B1443,0x2EF62886,0x398D3CC5,0x5DEC510C,0x4A97454F,0x731A798A,0x64616DC9, + 0xBBD8A218,0xACA3B65B,0x952E8A9E,0x82559EDD,0xE634F314,0xF14FE757,0xC8C2DB92,0xDFB9CFD1, + 0xACC04271,0xBBBB5632,0x82366AF7,0x954D7EB4,0xF12C137D,0xE657073E,0xDFDA3BFB,0xC8A12FB8, + 0x1718E069,0x0063F42A,0x39EEC8EF,0x2E95DCAC,0x4AF4B165,0x5D8FA526,0x640299E3,0x73798DA0, + 0x82F182A3,0x958A96E0,0xAC07AA25,0xBB7CBE66,0xDF1DD3AF,0xC866C7EC,0xF1EBFB29,0xE690EF6A, + 0x392920BB,0x2E5234F8,0x17DF083D,0x00A41C7E,0x64C571B7,0x73BE65F4,0x4A335931,0x5D484D72, + 0x2E31C0D2,0x394AD491,0x00C7E854,0x17BCFC17,0x73DD91DE,0x64A6859D,0x5D2BB958,0x4A50AD1B, + 0x95E962CA,0x82927689,0xBB1F4A4C,0xAC645E0F,0xC80533C6,0xDF7E2785,0xE6F31B40,0xF1880F03, + 0xDE920307,0xC9E91744,0xF0642B81,0xE71F3FC2,0x837E520B,0x94054648,0xAD887A8D,0xBAF36ECE, + 0x654AA11F,0x7231B55C,0x4BBC8999,0x5CC79DDA,0x38A6F013,0x2FDDE450,0x1650D895,0x012BCCD6, + 0x72524176,0x65295535,0x5CA469F0,0x4BDF7DB3,0x2FBE107A,0x38C50439,0x014838FC,0x16332CBF, + 0xC98AE36E,0xDEF1F72D,0xE77CCBE8,0xF007DFAB,0x9466B262,0x831DA621,0xBA909AE4,0xADEB8EA7, + 0x5C6381A4,0x4B1895E7,0x7295A922,0x65EEBD61,0x018FD0A8,0x16F4C4EB,0x2F79F82E,0x3802EC6D, + 0xE7BB23BC,0xF0C037FF,0xC94D0B3A,0xDE361F79,0xBA5772B0,0xAD2C66F3,0x94A15A36,0x83DA4E75, + 0xF0A3C3D5,0xE7D8D796,0xDE55EB53,0xC92EFF10,0xAD4F92D9,0xBA34869A,0x83B9BA5F,0x94C2AE1C, + 0x4B7B61CD,0x5C00758E,0x658D494B,0x72F65D08,0x169730C1,0x01EC2482,0x38611847,0x2F1A0C04, + 0x6655004F,0x712E140C,0x48A328C9,0x5FD83C8A,0x3BB95143,0x2CC24500,0x154F79C5,0x02346D86, + 0xDD8DA257,0xCAF6B614,0xF37B8AD1,0xE4009E92,0x8061F35B,0x971AE718,0xAE97DBDD,0xB9ECCF9E, + 0xCA95423E,0xDDEE567D,0xE4636AB8,0xF3187EFB,0x97791332,0x80020771,0xB98F3BB4,0xAEF42FF7, + 0x714DE026,0x6636F465,0x5FBBC8A0,0x48C0DCE3,0x2CA1B12A,0x3BDAA569,0x025799AC,0x152C8DEF, + 0xE4A482EC,0xF3DF96AF,0xCA52AA6A,0xDD29BE29,0xB948D3E0,0xAE33C7A3,0x97BEFB66,0x80C5EF25, + 0x5F7C20F4,0x480734B7,0x718A0872,0x66F11C31,0x029071F8,0x15EB65BB,0x2C66597E,0x3B1D4D3D, + 0x4864C09D,0x5F1FD4DE,0x6692E81B,0x71E9FC58,0x15889191,0x02F385D2,0x3B7EB917,0x2C05AD54, + 0xF3BC6285,0xE4C776C6,0xDD4A4A03,0xCA315E40,0xAE503389,0xB92B27CA,0x80A61B0F,0x97DD0F4C, + 0xB8C70348,0xAFBC170B,0x96312BCE,0x814A3F8D,0xE52B5244,0xF2504607,0xCBDD7AC2,0xDCA66E81, + 0x031FA150,0x1464B513,0x2DE989D6,0x3A929D95,0x5EF3F05C,0x4988E41F,0x7005D8DA,0x677ECC99, + 0x14074139,0x037C557A,0x3AF169BF,0x2D8A7DFC,0x49EB1035,0x5E900476,0x671D38B3,0x70662CF0, + 0xAFDFE321,0xB8A4F762,0x8129CBA7,0x9652DFE4,0xF233B22D,0xE548A66E,0xDCC59AAB,0xCBBE8EE8, + 0x3A3681EB,0x2D4D95A8,0x14C0A96D,0x03BBBD2E,0x67DAD0E7,0x70A1C4A4,0x492CF861,0x5E57EC22, + 0x81EE23F3,0x969537B0,0xAF180B75,0xB8631F36,0xDC0272FF,0xCB7966BC,0xF2F45A79,0xE58F4E3A, + 0x96F6C39A,0x818DD7D9,0xB800EB1C,0xAF7BFF5F,0xCB1A9296,0xDC6186D5,0xE5ECBA10,0xF297AE53, + 0x2D2E6182,0x3A5575C1,0x03D84904,0x14A35D47,0x70C2308E,0x67B924CD,0x5E341808,0x494F0C4B, + }, + + { + 0x00000000,0xEFC26B3E,0x04F5D03D,0xEB37BB03,0x09EBA07A,0xE629CB44,0x0D1E7047,0xE2DC1B79, + 0x13D740F4,0xFC152BCA,0x172290C9,0xF8E0FBF7,0x1A3CE08E,0xF5FE8BB0,0x1EC930B3,0xF10B5B8D, + 0x27AE81E8,0xC86CEAD6,0x235B51D5,0xCC993AEB,0x2E452192,0xC1874AAC,0x2AB0F1AF,0xC5729A91, + 0x3479C11C,0xDBBBAA22,0x308C1121,0xDF4E7A1F,0x3D926166,0xD2500A58,0x3967B15B,0xD6A5DA65, + 0x4F5D03D0,0xA09F68EE,0x4BA8D3ED,0xA46AB8D3,0x46B6A3AA,0xA974C894,0x42437397,0xAD8118A9, + 0x5C8A4324,0xB348281A,0x587F9319,0xB7BDF827,0x5561E35E,0xBAA38860,0x51943363,0xBE56585D, + 0x68F38238,0x8731E906,0x6C065205,0x83C4393B,0x61182242,0x8EDA497C,0x65EDF27F,0x8A2F9941, + 0x7B24C2CC,0x94E6A9F2,0x7FD112F1,0x901379CF,0x72CF62B6,0x9D0D0988,0x763AB28B,0x99F8D9B5, + 0x9EBA07A0,0x71786C9E,0x9A4FD79D,0x758DBCA3,0x9751A7DA,0x7893CCE4,0x93A477E7,0x7C661CD9, + 0x8D6D4754,0x62AF2C6A,0x89989769,0x665AFC57,0x8486E72E,0x6B448C10,0x80733713,0x6FB15C2D, + 0xB9148648,0x56D6ED76,0xBDE15675,0x52233D4B,0xB0FF2632,0x5F3D4D0C,0xB40AF60F,0x5BC89D31, + 0xAAC3C6BC,0x4501AD82,0xAE361681,0x41F47DBF,0xA32866C6,0x4CEA0DF8,0xA7DDB6FB,0x481FDDC5, + 0xD1E70470,0x3E256F4E,0xD512D44D,0x3AD0BF73,0xD80CA40A,0x37CECF34,0xDCF97437,0x333B1F09, + 0xC2304484,0x2DF22FBA,0xC6C594B9,0x2907FF87,0xCBDBE4FE,0x24198FC0,0xCF2E34C3,0x20EC5FFD, + 0xF6498598,0x198BEEA6,0xF2BC55A5,0x1D7E3E9B,0xFFA225E2,0x10604EDC,0xFB57F5DF,0x14959EE1, + 0xE59EC56C,0x0A5CAE52,0xE16B1551,0x0EA97E6F,0xEC756516,0x03B70E28,0xE880B52B,0x0742DE15, + 0xE6050901,0x09C7623F,0xE2F0D93C,0x0D32B202,0xEFEEA97B,0x002CC245,0xEB1B7946,0x04D91278, + 0xF5D249F5,0x1A1022CB,0xF12799C8,0x1EE5F2F6,0xFC39E98F,0x13FB82B1,0xF8CC39B2,0x170E528C, + 0xC1AB88E9,0x2E69E3D7,0xC55E58D4,0x2A9C33EA,0xC8402893,0x278243AD,0xCCB5F8AE,0x23779390, + 0xD27CC81D,0x3DBEA323,0xD6891820,0x394B731E,0xDB976867,0x34550359,0xDF62B85A,0x30A0D364, + 0xA9580AD1,0x469A61EF,0xADADDAEC,0x426FB1D2,0xA0B3AAAB,0x4F71C195,0xA4467A96,0x4B8411A8, + 0xBA8F4A25,0x554D211B,0xBE7A9A18,0x51B8F126,0xB364EA5F,0x5CA68161,0xB7913A62,0x5853515C, + 0x8EF68B39,0x6134E007,0x8A035B04,0x65C1303A,0x871D2B43,0x68DF407D,0x83E8FB7E,0x6C2A9040, + 0x9D21CBCD,0x72E3A0F3,0x99D41BF0,0x761670CE,0x94CA6BB7,0x7B080089,0x903FBB8A,0x7FFDD0B4, + 0x78BF0EA1,0x977D659F,0x7C4ADE9C,0x9388B5A2,0x7154AEDB,0x9E96C5E5,0x75A17EE6,0x9A6315D8, + 0x6B684E55,0x84AA256B,0x6F9D9E68,0x805FF556,0x6283EE2F,0x8D418511,0x66763E12,0x89B4552C, + 0x5F118F49,0xB0D3E477,0x5BE45F74,0xB426344A,0x56FA2F33,0xB938440D,0x520FFF0E,0xBDCD9430, + 0x4CC6CFBD,0xA304A483,0x48331F80,0xA7F174BE,0x452D6FC7,0xAAEF04F9,0x41D8BFFA,0xAE1AD4C4, + 0x37E20D71,0xD820664F,0x3317DD4C,0xDCD5B672,0x3E09AD0B,0xD1CBC635,0x3AFC7D36,0xD53E1608, + 0x24354D85,0xCBF726BB,0x20C09DB8,0xCF02F686,0x2DDEEDFF,0xC21C86C1,0x292B3DC2,0xC6E956FC, + 0x104C8C99,0xFF8EE7A7,0x14B95CA4,0xFB7B379A,0x19A72CE3,0xF66547DD,0x1D52FCDE,0xF29097E0, + 0x039BCC6D,0xEC59A753,0x076E1C50,0xE8AC776E,0x0A706C17,0xE5B20729,0x0E85BC2A,0xE147D714, + }, + + { + 0x00000000,0xC18EDFC0,0x586CB9C1,0x99E26601,0xB0D97382,0x7157AC42,0xE8B5CA43,0x293B1583, + 0xBAC3E145,0x7B4D3E85,0xE2AF5884,0x23218744,0x0A1A92C7,0xCB944D07,0x52762B06,0x93F8F4C6, + 0xAEF6C4CB,0x6F781B0B,0xF69A7D0A,0x3714A2CA,0x1E2FB749,0xDFA16889,0x46430E88,0x87CDD148, + 0x1435258E,0xD5BBFA4E,0x4C599C4F,0x8DD7438F,0xA4EC560C,0x656289CC,0xFC80EFCD,0x3D0E300D, + 0x869C8FD7,0x47125017,0xDEF03616,0x1F7EE9D6,0x3645FC55,0xF7CB2395,0x6E294594,0xAFA79A54, + 0x3C5F6E92,0xFDD1B152,0x6433D753,0xA5BD0893,0x8C861D10,0x4D08C2D0,0xD4EAA4D1,0x15647B11, + 0x286A4B1C,0xE9E494DC,0x7006F2DD,0xB1882D1D,0x98B3389E,0x593DE75E,0xC0DF815F,0x01515E9F, + 0x92A9AA59,0x53277599,0xCAC51398,0x0B4BCC58,0x2270D9DB,0xE3FE061B,0x7A1C601A,0xBB92BFDA, + 0xD64819EF,0x17C6C62F,0x8E24A02E,0x4FAA7FEE,0x66916A6D,0xA71FB5AD,0x3EFDD3AC,0xFF730C6C, + 0x6C8BF8AA,0xAD05276A,0x34E7416B,0xF5699EAB,0xDC528B28,0x1DDC54E8,0x843E32E9,0x45B0ED29, + 0x78BEDD24,0xB93002E4,0x20D264E5,0xE15CBB25,0xC867AEA6,0x09E97166,0x900B1767,0x5185C8A7, + 0xC27D3C61,0x03F3E3A1,0x9A1185A0,0x5B9F5A60,0x72A44FE3,0xB32A9023,0x2AC8F622,0xEB4629E2, + 0x50D49638,0x915A49F8,0x08B82FF9,0xC936F039,0xE00DE5BA,0x21833A7A,0xB8615C7B,0x79EF83BB, + 0xEA17777D,0x2B99A8BD,0xB27BCEBC,0x73F5117C,0x5ACE04FF,0x9B40DB3F,0x02A2BD3E,0xC32C62FE, + 0xFE2252F3,0x3FAC8D33,0xA64EEB32,0x67C034F2,0x4EFB2171,0x8F75FEB1,0x169798B0,0xD7194770, + 0x44E1B3B6,0x856F6C76,0x1C8D0A77,0xDD03D5B7,0xF438C034,0x35B61FF4,0xAC5479F5,0x6DDAA635, + 0x77E1359F,0xB66FEA5F,0x2F8D8C5E,0xEE03539E,0xC738461D,0x06B699DD,0x9F54FFDC,0x5EDA201C, + 0xCD22D4DA,0x0CAC0B1A,0x954E6D1B,0x54C0B2DB,0x7DFBA758,0xBC757898,0x25971E99,0xE419C159, + 0xD917F154,0x18992E94,0x817B4895,0x40F59755,0x69CE82D6,0xA8405D16,0x31A23B17,0xF02CE4D7, + 0x63D41011,0xA25ACFD1,0x3BB8A9D0,0xFA367610,0xD30D6393,0x1283BC53,0x8B61DA52,0x4AEF0592, + 0xF17DBA48,0x30F36588,0xA9110389,0x689FDC49,0x41A4C9CA,0x802A160A,0x19C8700B,0xD846AFCB, + 0x4BBE5B0D,0x8A3084CD,0x13D2E2CC,0xD25C3D0C,0xFB67288F,0x3AE9F74F,0xA30B914E,0x62854E8E, + 0x5F8B7E83,0x9E05A143,0x07E7C742,0xC6691882,0xEF520D01,0x2EDCD2C1,0xB73EB4C0,0x76B06B00, + 0xE5489FC6,0x24C64006,0xBD242607,0x7CAAF9C7,0x5591EC44,0x941F3384,0x0DFD5585,0xCC738A45, + 0xA1A92C70,0x6027F3B0,0xF9C595B1,0x384B4A71,0x11705FF2,0xD0FE8032,0x491CE633,0x889239F3, + 0x1B6ACD35,0xDAE412F5,0x430674F4,0x8288AB34,0xABB3BEB7,0x6A3D6177,0xF3DF0776,0x3251D8B6, + 0x0F5FE8BB,0xCED1377B,0x5733517A,0x96BD8EBA,0xBF869B39,0x7E0844F9,0xE7EA22F8,0x2664FD38, + 0xB59C09FE,0x7412D63E,0xEDF0B03F,0x2C7E6FFF,0x05457A7C,0xC4CBA5BC,0x5D29C3BD,0x9CA71C7D, + 0x2735A3A7,0xE6BB7C67,0x7F591A66,0xBED7C5A6,0x97ECD025,0x56620FE5,0xCF8069E4,0x0E0EB624, + 0x9DF642E2,0x5C789D22,0xC59AFB23,0x041424E3,0x2D2F3160,0xECA1EEA0,0x754388A1,0xB4CD5761, + 0x89C3676C,0x484DB8AC,0xD1AFDEAD,0x1021016D,0x391A14EE,0xF894CB2E,0x6176AD2F,0xA0F872EF, + 0x33008629,0xF28E59E9,0x6B6C3FE8,0xAAE2E028,0x83D9F5AB,0x42572A6B,0xDBB54C6A,0x1A3B93AA, + }, + + { + 0x00000000,0x9BA54C6F,0xEC3B9E9F,0x779ED2F0,0x03063B7F,0x98A37710,0xEF3DA5E0,0x7498E98F, + 0x060C76FE,0x9DA93A91,0xEA37E861,0x7192A40E,0x050A4D81,0x9EAF01EE,0xE931D31E,0x72949F71, + 0x0C18EDFC,0x97BDA193,0xE0237363,0x7B863F0C,0x0F1ED683,0x94BB9AEC,0xE325481C,0x78800473, + 0x0A149B02,0x91B1D76D,0xE62F059D,0x7D8A49F2,0x0912A07D,0x92B7EC12,0xE5293EE2,0x7E8C728D, + 0x1831DBF8,0x83949797,0xF40A4567,0x6FAF0908,0x1B37E087,0x8092ACE8,0xF70C7E18,0x6CA93277, + 0x1E3DAD06,0x8598E169,0xF2063399,0x69A37FF6,0x1D3B9679,0x869EDA16,0xF10008E6,0x6AA54489, + 0x14293604,0x8F8C7A6B,0xF812A89B,0x63B7E4F4,0x172F0D7B,0x8C8A4114,0xFB1493E4,0x60B1DF8B, + 0x122540FA,0x89800C95,0xFE1EDE65,0x65BB920A,0x11237B85,0x8A8637EA,0xFD18E51A,0x66BDA975, + 0x3063B7F0,0xABC6FB9F,0xDC58296F,0x47FD6500,0x33658C8F,0xA8C0C0E0,0xDF5E1210,0x44FB5E7F, + 0x366FC10E,0xADCA8D61,0xDA545F91,0x41F113FE,0x3569FA71,0xAECCB61E,0xD95264EE,0x42F72881, + 0x3C7B5A0C,0xA7DE1663,0xD040C493,0x4BE588FC,0x3F7D6173,0xA4D82D1C,0xD346FFEC,0x48E3B383, + 0x3A772CF2,0xA1D2609D,0xD64CB26D,0x4DE9FE02,0x3971178D,0xA2D45BE2,0xD54A8912,0x4EEFC57D, + 0x28526C08,0xB3F72067,0xC469F297,0x5FCCBEF8,0x2B545777,0xB0F11B18,0xC76FC9E8,0x5CCA8587, + 0x2E5E1AF6,0xB5FB5699,0xC2658469,0x59C0C806,0x2D582189,0xB6FD6DE6,0xC163BF16,0x5AC6F379, + 0x244A81F4,0xBFEFCD9B,0xC8711F6B,0x53D45304,0x274CBA8B,0xBCE9F6E4,0xCB772414,0x50D2687B, + 0x2246F70A,0xB9E3BB65,0xCE7D6995,0x55D825FA,0x2140CC75,0xBAE5801A,0xCD7B52EA,0x56DE1E85, + 0x60C76FE0,0xFB62238F,0x8CFCF17F,0x1759BD10,0x63C1549F,0xF86418F0,0x8FFACA00,0x145F866F, + 0x66CB191E,0xFD6E5571,0x8AF08781,0x1155CBEE,0x65CD2261,0xFE686E0E,0x89F6BCFE,0x1253F091, + 0x6CDF821C,0xF77ACE73,0x80E41C83,0x1B4150EC,0x6FD9B963,0xF47CF50C,0x83E227FC,0x18476B93, + 0x6AD3F4E2,0xF176B88D,0x86E86A7D,0x1D4D2612,0x69D5CF9D,0xF27083F2,0x85EE5102,0x1E4B1D6D, + 0x78F6B418,0xE353F877,0x94CD2A87,0x0F6866E8,0x7BF08F67,0xE055C308,0x97CB11F8,0x0C6E5D97, + 0x7EFAC2E6,0xE55F8E89,0x92C15C79,0x09641016,0x7DFCF999,0xE659B5F6,0x91C76706,0x0A622B69, + 0x74EE59E4,0xEF4B158B,0x98D5C77B,0x03708B14,0x77E8629B,0xEC4D2EF4,0x9BD3FC04,0x0076B06B, + 0x72E22F1A,0xE9476375,0x9ED9B185,0x057CFDEA,0x71E41465,0xEA41580A,0x9DDF8AFA,0x067AC695, + 0x50A4D810,0xCB01947F,0xBC9F468F,0x273A0AE0,0x53A2E36F,0xC807AF00,0xBF997DF0,0x243C319F, + 0x56A8AEEE,0xCD0DE281,0xBA933071,0x21367C1E,0x55AE9591,0xCE0BD9FE,0xB9950B0E,0x22304761, + 0x5CBC35EC,0xC7197983,0xB087AB73,0x2B22E71C,0x5FBA0E93,0xC41F42FC,0xB381900C,0x2824DC63, + 0x5AB04312,0xC1150F7D,0xB68BDD8D,0x2D2E91E2,0x59B6786D,0xC2133402,0xB58DE6F2,0x2E28AA9D, + 0x489503E8,0xD3304F87,0xA4AE9D77,0x3F0BD118,0x4B933897,0xD03674F8,0xA7A8A608,0x3C0DEA67, + 0x4E997516,0xD53C3979,0xA2A2EB89,0x3907A7E6,0x4D9F4E69,0xD63A0206,0xA1A4D0F6,0x3A019C99, + 0x448DEE14,0xDF28A27B,0xA8B6708B,0x33133CE4,0x478BD56B,0xDC2E9904,0xABB04BF4,0x3015079B, + 0x428198EA,0xD924D485,0xAEBA0675,0x351F4A1A,0x4187A395,0xDA22EFFA,0xADBC3D0A,0x36197165, + }, + + { + 0x00000000,0xDD96D985,0x605CB54B,0xBDCA6CCE,0xC0B96A96,0x1D2FB313,0xA0E5DFDD,0x7D730658, + 0x5A03D36D,0x87950AE8,0x3A5F6626,0xE7C9BFA3,0x9ABAB9FB,0x472C607E,0xFAE60CB0,0x2770D535, + 0xB407A6DA,0x69917F5F,0xD45B1391,0x09CDCA14,0x74BECC4C,0xA92815C9,0x14E27907,0xC974A082, + 0xEE0475B7,0x3392AC32,0x8E58C0FC,0x53CE1979,0x2EBD1F21,0xF32BC6A4,0x4EE1AA6A,0x937773EF, + 0xB37E4BF5,0x6EE89270,0xD322FEBE,0x0EB4273B,0x73C72163,0xAE51F8E6,0x139B9428,0xCE0D4DAD, + 0xE97D9898,0x34EB411D,0x89212DD3,0x54B7F456,0x29C4F20E,0xF4522B8B,0x49984745,0x940E9EC0, + 0x0779ED2F,0xDAEF34AA,0x67255864,0xBAB381E1,0xC7C087B9,0x1A565E3C,0xA79C32F2,0x7A0AEB77, + 0x5D7A3E42,0x80ECE7C7,0x3D268B09,0xE0B0528C,0x9DC354D4,0x40558D51,0xFD9FE19F,0x2009381A, + 0xBD8D91AB,0x601B482E,0xDDD124E0,0x0047FD65,0x7D34FB3D,0xA0A222B8,0x1D684E76,0xC0FE97F3, + 0xE78E42C6,0x3A189B43,0x87D2F78D,0x5A442E08,0x27372850,0xFAA1F1D5,0x476B9D1B,0x9AFD449E, + 0x098A3771,0xD41CEEF4,0x69D6823A,0xB4405BBF,0xC9335DE7,0x14A58462,0xA96FE8AC,0x74F93129, + 0x5389E41C,0x8E1F3D99,0x33D55157,0xEE4388D2,0x93308E8A,0x4EA6570F,0xF36C3BC1,0x2EFAE244, + 0x0EF3DA5E,0xD36503DB,0x6EAF6F15,0xB339B690,0xCE4AB0C8,0x13DC694D,0xAE160583,0x7380DC06, + 0x54F00933,0x8966D0B6,0x34ACBC78,0xE93A65FD,0x944963A5,0x49DFBA20,0xF415D6EE,0x29830F6B, + 0xBAF47C84,0x6762A501,0xDAA8C9CF,0x073E104A,0x7A4D1612,0xA7DBCF97,0x1A11A359,0xC7877ADC, + 0xE0F7AFE9,0x3D61766C,0x80AB1AA2,0x5D3DC327,0x204EC57F,0xFDD81CFA,0x40127034,0x9D84A9B1, + 0xA06A2517,0x7DFCFC92,0xC036905C,0x1DA049D9,0x60D34F81,0xBD459604,0x008FFACA,0xDD19234F, + 0xFA69F67A,0x27FF2FFF,0x9A354331,0x47A39AB4,0x3AD09CEC,0xE7464569,0x5A8C29A7,0x871AF022, + 0x146D83CD,0xC9FB5A48,0x74313686,0xA9A7EF03,0xD4D4E95B,0x094230DE,0xB4885C10,0x691E8595, + 0x4E6E50A0,0x93F88925,0x2E32E5EB,0xF3A43C6E,0x8ED73A36,0x5341E3B3,0xEE8B8F7D,0x331D56F8, + 0x13146EE2,0xCE82B767,0x7348DBA9,0xAEDE022C,0xD3AD0474,0x0E3BDDF1,0xB3F1B13F,0x6E6768BA, + 0x4917BD8F,0x9481640A,0x294B08C4,0xF4DDD141,0x89AED719,0x54380E9C,0xE9F26252,0x3464BBD7, + 0xA713C838,0x7A8511BD,0xC74F7D73,0x1AD9A4F6,0x67AAA2AE,0xBA3C7B2B,0x07F617E5,0xDA60CE60, + 0xFD101B55,0x2086C2D0,0x9D4CAE1E,0x40DA779B,0x3DA971C3,0xE03FA846,0x5DF5C488,0x80631D0D, + 0x1DE7B4BC,0xC0716D39,0x7DBB01F7,0xA02DD872,0xDD5EDE2A,0x00C807AF,0xBD026B61,0x6094B2E4, + 0x47E467D1,0x9A72BE54,0x27B8D29A,0xFA2E0B1F,0x875D0D47,0x5ACBD4C2,0xE701B80C,0x3A976189, + 0xA9E01266,0x7476CBE3,0xC9BCA72D,0x142A7EA8,0x695978F0,0xB4CFA175,0x0905CDBB,0xD493143E, + 0xF3E3C10B,0x2E75188E,0x93BF7440,0x4E29ADC5,0x335AAB9D,0xEECC7218,0x53061ED6,0x8E90C753, + 0xAE99FF49,0x730F26CC,0xCEC54A02,0x13539387,0x6E2095DF,0xB3B64C5A,0x0E7C2094,0xD3EAF911, + 0xF49A2C24,0x290CF5A1,0x94C6996F,0x495040EA,0x342346B2,0xE9B59F37,0x547FF3F9,0x89E92A7C, + 0x1A9E5993,0xC7088016,0x7AC2ECD8,0xA754355D,0xDA273305,0x07B1EA80,0xBA7B864E,0x67ED5FCB, + 0x409D8AFE,0x9D0B537B,0x20C13FB5,0xFD57E630,0x8024E068,0x5DB239ED,0xE0785523,0x3DEE8CA6, + }, + + { + 0x00000000,0x9D0FE176,0xE16EC4AD,0x7C6125DB,0x19AC8F1B,0x84A36E6D,0xF8C24BB6,0x65CDAAC0, + 0x33591E36,0xAE56FF40,0xD237DA9B,0x4F383BED,0x2AF5912D,0xB7FA705B,0xCB9B5580,0x5694B4F6, + 0x66B23C6C,0xFBBDDD1A,0x87DCF8C1,0x1AD319B7,0x7F1EB377,0xE2115201,0x9E7077DA,0x037F96AC, + 0x55EB225A,0xC8E4C32C,0xB485E6F7,0x298A0781,0x4C47AD41,0xD1484C37,0xAD2969EC,0x3026889A, + 0xCD6478D8,0x506B99AE,0x2C0ABC75,0xB1055D03,0xD4C8F7C3,0x49C716B5,0x35A6336E,0xA8A9D218, + 0xFE3D66EE,0x63328798,0x1F53A243,0x825C4335,0xE791E9F5,0x7A9E0883,0x06FF2D58,0x9BF0CC2E, + 0xABD644B4,0x36D9A5C2,0x4AB88019,0xD7B7616F,0xB27ACBAF,0x2F752AD9,0x53140F02,0xCE1BEE74, + 0x988F5A82,0x0580BBF4,0x79E19E2F,0xE4EE7F59,0x8123D599,0x1C2C34EF,0x604D1134,0xFD42F042, + 0x41B9F7F1,0xDCB61687,0xA0D7335C,0x3DD8D22A,0x581578EA,0xC51A999C,0xB97BBC47,0x24745D31, + 0x72E0E9C7,0xEFEF08B1,0x938E2D6A,0x0E81CC1C,0x6B4C66DC,0xF64387AA,0x8A22A271,0x172D4307, + 0x270BCB9D,0xBA042AEB,0xC6650F30,0x5B6AEE46,0x3EA74486,0xA3A8A5F0,0xDFC9802B,0x42C6615D, + 0x1452D5AB,0x895D34DD,0xF53C1106,0x6833F070,0x0DFE5AB0,0x90F1BBC6,0xEC909E1D,0x719F7F6B, + 0x8CDD8F29,0x11D26E5F,0x6DB34B84,0xF0BCAAF2,0x95710032,0x087EE144,0x741FC49F,0xE91025E9, + 0xBF84911F,0x228B7069,0x5EEA55B2,0xC3E5B4C4,0xA6281E04,0x3B27FF72,0x4746DAA9,0xDA493BDF, + 0xEA6FB345,0x77605233,0x0B0177E8,0x960E969E,0xF3C33C5E,0x6ECCDD28,0x12ADF8F3,0x8FA21985, + 0xD936AD73,0x44394C05,0x385869DE,0xA55788A8,0xC09A2268,0x5D95C31E,0x21F4E6C5,0xBCFB07B3, + 0x8373EFE2,0x1E7C0E94,0x621D2B4F,0xFF12CA39,0x9ADF60F9,0x07D0818F,0x7BB1A454,0xE6BE4522, + 0xB02AF1D4,0x2D2510A2,0x51443579,0xCC4BD40F,0xA9867ECF,0x34899FB9,0x48E8BA62,0xD5E75B14, + 0xE5C1D38E,0x78CE32F8,0x04AF1723,0x99A0F655,0xFC6D5C95,0x6162BDE3,0x1D039838,0x800C794E, + 0xD698CDB8,0x4B972CCE,0x37F60915,0xAAF9E863,0xCF3442A3,0x523BA3D5,0x2E5A860E,0xB3556778, + 0x4E17973A,0xD318764C,0xAF795397,0x3276B2E1,0x57BB1821,0xCAB4F957,0xB6D5DC8C,0x2BDA3DFA, + 0x7D4E890C,0xE041687A,0x9C204DA1,0x012FACD7,0x64E20617,0xF9EDE761,0x858CC2BA,0x188323CC, + 0x28A5AB56,0xB5AA4A20,0xC9CB6FFB,0x54C48E8D,0x3109244D,0xAC06C53B,0xD067E0E0,0x4D680196, + 0x1BFCB560,0x86F35416,0xFA9271CD,0x679D90BB,0x02503A7B,0x9F5FDB0D,0xE33EFED6,0x7E311FA0, + 0xC2CA1813,0x5FC5F965,0x23A4DCBE,0xBEAB3DC8,0xDB669708,0x4669767E,0x3A0853A5,0xA707B2D3, + 0xF1930625,0x6C9CE753,0x10FDC288,0x8DF223FE,0xE83F893E,0x75306848,0x09514D93,0x945EACE5, + 0xA478247F,0x3977C509,0x4516E0D2,0xD81901A4,0xBDD4AB64,0x20DB4A12,0x5CBA6FC9,0xC1B58EBF, + 0x97213A49,0x0A2EDB3F,0x764FFEE4,0xEB401F92,0x8E8DB552,0x13825424,0x6FE371FF,0xF2EC9089, + 0x0FAE60CB,0x92A181BD,0xEEC0A466,0x73CF4510,0x1602EFD0,0x8B0D0EA6,0xF76C2B7D,0x6A63CA0B, + 0x3CF77EFD,0xA1F89F8B,0xDD99BA50,0x40965B26,0x255BF1E6,0xB8541090,0xC435354B,0x593AD43D, + 0x691C5CA7,0xF413BDD1,0x8872980A,0x157D797C,0x70B0D3BC,0xEDBF32CA,0x91DE1711,0x0CD1F667, + 0x5A454291,0xC74AA3E7,0xBB2B863C,0x2624674A,0x43E9CD8A,0xDEE62CFC,0xA2870927,0x3F88E851, + }, + + { + 0x00000000,0xB9FBDBE8,0xA886B191,0x117D6A79,0x8A7C6563,0x3387BE8B,0x22FAD4F2,0x9B010F1A, + 0xCF89CC87,0x7672176F,0x670F7D16,0xDEF4A6FE,0x45F5A9E4,0xFC0E720C,0xED731875,0x5488C39D, + 0x44629F4F,0xFD9944A7,0xECE42EDE,0x551FF536,0xCE1EFA2C,0x77E521C4,0x66984BBD,0xDF639055, + 0x8BEB53C8,0x32108820,0x236DE259,0x9A9639B1,0x019736AB,0xB86CED43,0xA911873A,0x10EA5CD2, + 0x88C53E9E,0x313EE576,0x20438F0F,0x99B854E7,0x02B95BFD,0xBB428015,0xAA3FEA6C,0x13C43184, + 0x474CF219,0xFEB729F1,0xEFCA4388,0x56319860,0xCD30977A,0x74CB4C92,0x65B626EB,0xDC4DFD03, + 0xCCA7A1D1,0x755C7A39,0x64211040,0xDDDACBA8,0x46DBC4B2,0xFF201F5A,0xEE5D7523,0x57A6AECB, + 0x032E6D56,0xBAD5B6BE,0xABA8DCC7,0x1253072F,0x89520835,0x30A9D3DD,0x21D4B9A4,0x982F624C, + 0xCAFB7B7D,0x7300A095,0x627DCAEC,0xDB861104,0x40871E1E,0xF97CC5F6,0xE801AF8F,0x51FA7467, + 0x0572B7FA,0xBC896C12,0xADF4066B,0x140FDD83,0x8F0ED299,0x36F50971,0x27886308,0x9E73B8E0, + 0x8E99E432,0x37623FDA,0x261F55A3,0x9FE48E4B,0x04E58151,0xBD1E5AB9,0xAC6330C0,0x1598EB28, + 0x411028B5,0xF8EBF35D,0xE9969924,0x506D42CC,0xCB6C4DD6,0x7297963E,0x63EAFC47,0xDA1127AF, + 0x423E45E3,0xFBC59E0B,0xEAB8F472,0x53432F9A,0xC8422080,0x71B9FB68,0x60C49111,0xD93F4AF9, + 0x8DB78964,0x344C528C,0x253138F5,0x9CCAE31D,0x07CBEC07,0xBE3037EF,0xAF4D5D96,0x16B6867E, + 0x065CDAAC,0xBFA70144,0xAEDA6B3D,0x1721B0D5,0x8C20BFCF,0x35DB6427,0x24A60E5E,0x9D5DD5B6, + 0xC9D5162B,0x702ECDC3,0x6153A7BA,0xD8A87C52,0x43A97348,0xFA52A8A0,0xEB2FC2D9,0x52D41931, + 0x4E87F0BB,0xF77C2B53,0xE601412A,0x5FFA9AC2,0xC4FB95D8,0x7D004E30,0x6C7D2449,0xD586FFA1, + 0x810E3C3C,0x38F5E7D4,0x29888DAD,0x90735645,0x0B72595F,0xB28982B7,0xA3F4E8CE,0x1A0F3326, + 0x0AE56FF4,0xB31EB41C,0xA263DE65,0x1B98058D,0x80990A97,0x3962D17F,0x281FBB06,0x91E460EE, + 0xC56CA373,0x7C97789B,0x6DEA12E2,0xD411C90A,0x4F10C610,0xF6EB1DF8,0xE7967781,0x5E6DAC69, + 0xC642CE25,0x7FB915CD,0x6EC47FB4,0xD73FA45C,0x4C3EAB46,0xF5C570AE,0xE4B81AD7,0x5D43C13F, + 0x09CB02A2,0xB030D94A,0xA14DB333,0x18B668DB,0x83B767C1,0x3A4CBC29,0x2B31D650,0x92CA0DB8, + 0x8220516A,0x3BDB8A82,0x2AA6E0FB,0x935D3B13,0x085C3409,0xB1A7EFE1,0xA0DA8598,0x19215E70, + 0x4DA99DED,0xF4524605,0xE52F2C7C,0x5CD4F794,0xC7D5F88E,0x7E2E2366,0x6F53491F,0xD6A892F7, + 0x847C8BC6,0x3D87502E,0x2CFA3A57,0x9501E1BF,0x0E00EEA5,0xB7FB354D,0xA6865F34,0x1F7D84DC, + 0x4BF54741,0xF20E9CA9,0xE373F6D0,0x5A882D38,0xC1892222,0x7872F9CA,0x690F93B3,0xD0F4485B, + 0xC01E1489,0x79E5CF61,0x6898A518,0xD1637EF0,0x4A6271EA,0xF399AA02,0xE2E4C07B,0x5B1F1B93, + 0x0F97D80E,0xB66C03E6,0xA711699F,0x1EEAB277,0x85EBBD6D,0x3C106685,0x2D6D0CFC,0x9496D714, + 0x0CB9B558,0xB5426EB0,0xA43F04C9,0x1DC4DF21,0x86C5D03B,0x3F3E0BD3,0x2E4361AA,0x97B8BA42, + 0xC33079DF,0x7ACBA237,0x6BB6C84E,0xD24D13A6,0x494C1CBC,0xF0B7C754,0xE1CAAD2D,0x583176C5, + 0x48DB2A17,0xF120F1FF,0xE05D9B86,0x59A6406E,0xC2A74F74,0x7B5C949C,0x6A21FEE5,0xD3DA250D, + 0x8752E690,0x3EA93D78,0x2FD45701,0x962F8CE9,0x0D2E83F3,0xB4D5581B,0xA5A83262,0x1C53E98A, + }, + + { + 0x00000000,0xAE689191,0x87A02563,0x29C8B4F2,0xD4314C87,0x7A59DD16,0x539169E4,0xFDF9F875, + 0x73139F4F,0xDD7B0EDE,0xF4B3BA2C,0x5ADB2BBD,0xA722D3C8,0x094A4259,0x2082F6AB,0x8EEA673A, + 0xE6273E9E,0x484FAF0F,0x61871BFD,0xCFEF8A6C,0x32167219,0x9C7EE388,0xB5B6577A,0x1BDEC6EB, + 0x9534A1D1,0x3B5C3040,0x129484B2,0xBCFC1523,0x4105ED56,0xEF6D7CC7,0xC6A5C835,0x68CD59A4, + 0x173F7B7D,0xB957EAEC,0x909F5E1E,0x3EF7CF8F,0xC30E37FA,0x6D66A66B,0x44AE1299,0xEAC68308, + 0x642CE432,0xCA4475A3,0xE38CC151,0x4DE450C0,0xB01DA8B5,0x1E753924,0x37BD8DD6,0x99D51C47, + 0xF11845E3,0x5F70D472,0x76B86080,0xD8D0F111,0x25290964,0x8B4198F5,0xA2892C07,0x0CE1BD96, + 0x820BDAAC,0x2C634B3D,0x05ABFFCF,0xABC36E5E,0x563A962B,0xF85207BA,0xD19AB348,0x7FF222D9, + 0x2E7EF6FA,0x8016676B,0xA9DED399,0x07B64208,0xFA4FBA7D,0x54272BEC,0x7DEF9F1E,0xD3870E8F, + 0x5D6D69B5,0xF305F824,0xDACD4CD6,0x74A5DD47,0x895C2532,0x2734B4A3,0x0EFC0051,0xA09491C0, + 0xC859C864,0x663159F5,0x4FF9ED07,0xE1917C96,0x1C6884E3,0xB2001572,0x9BC8A180,0x35A03011, + 0xBB4A572B,0x1522C6BA,0x3CEA7248,0x9282E3D9,0x6F7B1BAC,0xC1138A3D,0xE8DB3ECF,0x46B3AF5E, + 0x39418D87,0x97291C16,0xBEE1A8E4,0x10893975,0xED70C100,0x43185091,0x6AD0E463,0xC4B875F2, + 0x4A5212C8,0xE43A8359,0xCDF237AB,0x639AA63A,0x9E635E4F,0x300BCFDE,0x19C37B2C,0xB7ABEABD, + 0xDF66B319,0x710E2288,0x58C6967A,0xF6AE07EB,0x0B57FF9E,0xA53F6E0F,0x8CF7DAFD,0x229F4B6C, + 0xAC752C56,0x021DBDC7,0x2BD50935,0x85BD98A4,0x784460D1,0xD62CF140,0xFFE445B2,0x518CD423, + 0x5CFDEDF4,0xF2957C65,0xDB5DC897,0x75355906,0x88CCA173,0x26A430E2,0x0F6C8410,0xA1041581, + 0x2FEE72BB,0x8186E32A,0xA84E57D8,0x0626C649,0xFBDF3E3C,0x55B7AFAD,0x7C7F1B5F,0xD2178ACE, + 0xBADAD36A,0x14B242FB,0x3D7AF609,0x93126798,0x6EEB9FED,0xC0830E7C,0xE94BBA8E,0x47232B1F, + 0xC9C94C25,0x67A1DDB4,0x4E696946,0xE001F8D7,0x1DF800A2,0xB3909133,0x9A5825C1,0x3430B450, + 0x4BC29689,0xE5AA0718,0xCC62B3EA,0x620A227B,0x9FF3DA0E,0x319B4B9F,0x1853FF6D,0xB63B6EFC, + 0x38D109C6,0x96B99857,0xBF712CA5,0x1119BD34,0xECE04541,0x4288D4D0,0x6B406022,0xC528F1B3, + 0xADE5A817,0x038D3986,0x2A458D74,0x842D1CE5,0x79D4E490,0xD7BC7501,0xFE74C1F3,0x501C5062, + 0xDEF63758,0x709EA6C9,0x5956123B,0xF73E83AA,0x0AC77BDF,0xA4AFEA4E,0x8D675EBC,0x230FCF2D, + 0x72831B0E,0xDCEB8A9F,0xF5233E6D,0x5B4BAFFC,0xA6B25789,0x08DAC618,0x211272EA,0x8F7AE37B, + 0x01908441,0xAFF815D0,0x8630A122,0x285830B3,0xD5A1C8C6,0x7BC95957,0x5201EDA5,0xFC697C34, + 0x94A42590,0x3ACCB401,0x130400F3,0xBD6C9162,0x40956917,0xEEFDF886,0xC7354C74,0x695DDDE5, + 0xE7B7BADF,0x49DF2B4E,0x60179FBC,0xCE7F0E2D,0x3386F658,0x9DEE67C9,0xB426D33B,0x1A4E42AA, + 0x65BC6073,0xCBD4F1E2,0xE21C4510,0x4C74D481,0xB18D2CF4,0x1FE5BD65,0x362D0997,0x98459806, + 0x16AFFF3C,0xB8C76EAD,0x910FDA5F,0x3F674BCE,0xC29EB3BB,0x6CF6222A,0x453E96D8,0xEB560749, + 0x839B5EED,0x2DF3CF7C,0x043B7B8E,0xAA53EA1F,0x57AA126A,0xF9C283FB,0xD00A3709,0x7E62A698, + 0xF088C1A2,0x5EE05033,0x7728E4C1,0xD9407550,0x24B98D25,0x8AD11CB4,0xA319A846,0x0D7139D7, + } +#endif // CRC32_USE_LOOKUP_TABLE_SLICING_BY_16 +}; +#endif diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/file_adapter.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/file_adapter.h new file mode 100644 index 0000000000000000000000000000000000000000..36bc5de2710034ccaa4ccc9468520dab31cc9786 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/file_adapter.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +#include "caffe2/serialize/istream_adapter.h" +#include "caffe2/serialize/read_adapter_interface.h" + +namespace caffe2 { +namespace serialize { + +class TORCH_API FileAdapter final : public ReadAdapterInterface { + public: + C10_DISABLE_COPY_AND_ASSIGN(FileAdapter); + explicit FileAdapter(const std::string& file_name); + size_t size() const override; + size_t read(uint64_t pos, void* buf, size_t n, const char* what = "") + const override; + ~FileAdapter() override; + + private: + // An RAII Wrapper for a FILE pointer. Closes on destruction. + struct RAIIFile { + FILE* fp_; + explicit RAIIFile(const std::string& file_name); + ~RAIIFile(); + }; + + RAIIFile file_; + // The size of the opened file in bytes + uint64_t size_; +}; + +} // namespace serialize +} // namespace caffe2 diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/in_memory_adapter.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/in_memory_adapter.h new file mode 100644 index 0000000000000000000000000000000000000000..817f3f5d677493aa3a1e739df8c3b5dc8a8219ef --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/in_memory_adapter.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include + + +namespace caffe2 { +namespace serialize { + +class MemoryReadAdapter final : public caffe2::serialize::ReadAdapterInterface { + public: + explicit MemoryReadAdapter(const void* data, off_t size) + : data_(data), size_(size) {} + + size_t size() const override { + return size_; + } + + size_t read(uint64_t pos, void* buf, size_t n, const char* what = "") + const override { + (void) what; + memcpy(buf, (int8_t*)(data_) + pos, n); + return n; + } + + private: + const void* data_; + off_t size_; +}; + + +} // namespace serialize +} // namespace caffe2 diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/inline_container.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/inline_container.h new file mode 100644 index 0000000000000000000000000000000000000000..6a13d414feb9e6282ec797bd2e11182dc584b629 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/inline_container.h @@ -0,0 +1,278 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "caffe2/serialize/istream_adapter.h" +#include "caffe2/serialize/read_adapter_interface.h" +#include "caffe2/serialize/versions.h" + + +extern "C" { +typedef struct mz_zip_archive mz_zip_archive; +} + +// PyTorch containers are a special zip archive with the following layout +// archive_name.zip contains: +// archive_name/ +// version # a file with a single decimal number written in ascii, +// # used to establish the version of the archive format +// model.json # overall model description, this is a json output of +// # ModelDef from torch.proto +// # the following names are by convention only, model.json will +// # refer to these files by full names +// tensors/ +// 0 # flat storage for tensor data, meta-data about shapes, etc. is +// # in model.json +// 1 +// ... +// # code entries will only exist for modules that have methods attached +// code/ +// archive_name.py # serialized torch script code (python syntax, using +// PythonPrint) archive_name_my_submodule.py # submodules have separate +// files +// +// The PyTorchStreamWriter also ensures additional useful properties for these +// files +// 1. All files are stored uncompressed. +// 2. All files in the archive are aligned to 64 byte boundaries such that +// it is possible to mmap the entire file and get an aligned pointer to +// tensor data. +// 3. We universally write in ZIP64 format for consistency. + +// The PyTorchStreamReader also provides additional properties: +// 1. It can read zip files that are created with common +// zip tools. This means that even though our writer doesn't compress files, +// the reader can still read files that were compressed. +// 2. It provides a getRecordOffset function which returns the offset into the +// raw file where file data lives. If the file was written with +// PyTorchStreamWriter it is guaranteed to be 64 byte aligned. + +// PyTorchReader/Writer handle checking the version number on the archive format +// and ensure that all files are written to a archive_name directory so they +// unzip cleanly. + +// When developing this format we want to pay particular attention to the +// following use cases: +// +// -- Reading -- +// 1) Reading with full random access +// a) Reading with file api's such as fread() +// b) mmaping the file and jumping around the mapped region +// 2) Reading with 1-pass sequential access +// -> A reader will need to build up a data structure of parsed structures +// as it reads +// +// -- Writing -- +// 1) Writing with full random access +// 2) Writing with 1-pass sequential access +// -> We must take care not to require updating values that have already +// been written. We place the variable-length index at the end and do +// not put any indicies into the header to fulfill this constraint. + +// The model.json, which contains all the metadata information, +// should be written as the last file. One reason is that the size of tensor +// data is usually stable. As long as the shape and type of the tensor do not +// change, the size of the data won't change. On the other sied, the size of the +// serialized model is likely to change, so we store it as the last record, and +// we don't need to move previous records when updating the model data. + +// The zip format is sufficiently flexible to handle the above use-case. +// it puts its central directory at the end of the archive and we write +// model.json as the last file when writing after we have accumulated all +// other information. + +namespace caffe2 { +namespace serialize { + +static constexpr const char* kSerializationIdRecordName = ".data/serialization_id"; + +struct MzZipReaderIterWrapper; + +class TORCH_API ChunkRecordIterator { + public: + ~ChunkRecordIterator(); + + // Read at most `chunkSize` into `buf`. Return the number of actual bytes read. + size_t next(void* buf); + size_t recordSize() const { return recordSize_; } + + private: + ChunkRecordIterator( + size_t recordSize, + size_t chunkSize, + std::unique_ptr iter); + + const size_t recordSize_; + const size_t chunkSize_; + size_t offset_; + std::unique_ptr iter_; + + friend class PyTorchStreamReader; +}; + +class TORCH_API PyTorchStreamReader final { + public: + explicit PyTorchStreamReader(const std::string& file_name); + explicit PyTorchStreamReader(std::istream* in); + explicit PyTorchStreamReader(std::shared_ptr in); + + // return dataptr, size + std::tuple getRecord(const std::string& name); + // multi-thread getRecord + std::tuple getRecord(const std::string& name, std::vector>& additionalReaders); + // inplace memory writing + size_t getRecord(const std::string& name, void* dst, size_t n); + // inplace memory writing, multi-threads. + // When additionalReaders is empty, the default behavior is call getRecord(name, dst, n) with default reader + // This approach can be used for reading large tensors. + size_t getRecord(const std::string& name, void* dst, size_t n, + std::vector>& additionalReaders); + size_t getRecord( + const std::string& name, + void* dst, + size_t n, + size_t chunk_size, + void* buf, + const std::function& memcpy_func = nullptr); + + // Concurrent reading records with multiple readers. + // additionalReaders are additional clients to access the underlying record at different offsets + // and write to different trunks of buffers. + // If the overall size of the tensor is 10, and size of additionalReader is 2. + // The default thread will read [0,4), the additional reader will read [4,8). + // The default reader will read [8,10). + // The default reader will write to buffer[0,4), the additional reader will write to buffer[4,8), + // the additional reader will write to buffer[8,10). + // When additionalReaders is empty, the default behavior is call getRecord(name) with default reader + // This approach can be used for reading large tensors. + size_t getRecordMultiReaders(const std::string& name, + std::vector>& additionalReaders, + void *dst, size_t n); + + size_t getRecordSize(const std::string& name); + + size_t getRecordOffset(const std::string& name); + bool hasRecord(const std::string& name); + std::vector getAllRecords(); + + ChunkRecordIterator createChunkReaderIter( + const std::string& name, + const size_t recordSize, + const size_t chunkSize); + + ~PyTorchStreamReader(); + uint64_t version() const { + return version_; + } + const std::string& serializationId() { + return serialization_id_; + } + + void setShouldLoadDebugSymbol(bool should_load_debug_symbol) { + load_debug_symbol_ = should_load_debug_symbol; + } + void setAdditionalReaderSizeThreshold(const size_t& size){ + additional_reader_size_threshold_ = size; + } + private: + void init(); + size_t read(uint64_t pos, char* buf, size_t n); + void valid(const char* what, const char* info = ""); + size_t getRecordID(const std::string& name); + + friend size_t + istream_read_func(void* pOpaque, uint64_t file_ofs, void* pBuf, size_t n); + std::unique_ptr ar_; + std::string archive_name_; + std::string archive_name_plus_slash_; + std::shared_ptr in_; + int64_t version_; + std::mutex reader_lock_; + bool load_debug_symbol_ = true; + std::string serialization_id_; + size_t additional_reader_size_threshold_; +}; + +class TORCH_API PyTorchStreamWriter final { + public: + explicit PyTorchStreamWriter(const std::string& archive_name); + explicit PyTorchStreamWriter( + const std::function writer_func); + + void setMinVersion(const uint64_t version); + + void writeRecord( + const std::string& name, + const void* data, + size_t size, + bool compress = false); + void writeEndOfFile(); + + const std::unordered_set& getAllWrittenRecords(); + + bool finalized() const { + return finalized_; + } + + const std::string& archiveName() { + return archive_name_; + } + + const std::string& serializationId() { + return serialization_id_; + } + + ~PyTorchStreamWriter(); + + private: + void setup(const std::string& file_name); + void valid(const char* what, const char* info = ""); + void writeSerializationId(); + size_t current_pos_ = 0; + std::unordered_set files_written_; + std::unique_ptr ar_; + std::string archive_name_; + std::string archive_name_plus_slash_; + std::string padding_; + std::ofstream file_stream_; + std::function writer_func_; + uint64_t combined_uncomp_crc32_ = 0; + std::string serialization_id_; + + // This number will be updated when the model has operators + // that have valid upgraders. + uint64_t version_ = kMinProducedFileFormatVersion; + bool finalized_ = false; + bool err_seen_ = false; + friend size_t ostream_write_func( + void* pOpaque, + uint64_t file_ofs, + const void* pBuf, + size_t n); +}; + +namespace detail { +// Writer-specific constants +constexpr uint64_t kFieldAlignment = 64; + +// Returns a record to be appended to the local user extra data entry in order +// to make data beginning aligned at kFieldAlignment bytes boundary. +size_t getPadding( + size_t cursor, + size_t filename_size, + size_t size, + std::string& padding_buf); +} // namespace detail + +} // namespace serialize +} // namespace caffe2 diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/istream_adapter.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/istream_adapter.h new file mode 100644 index 0000000000000000000000000000000000000000..680c288a15f2e89d5a3b8b51d8aaddf44f727d19 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/istream_adapter.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include "c10/macros/Macros.h" +#include "caffe2/serialize/read_adapter_interface.h" + +namespace caffe2 { +namespace serialize { + +// this is a reader implemented by std::istream +class TORCH_API IStreamAdapter final : public ReadAdapterInterface { + public: + C10_DISABLE_COPY_AND_ASSIGN(IStreamAdapter); + explicit IStreamAdapter(std::istream* istream); + size_t size() const override; + size_t read(uint64_t pos, void* buf, size_t n, const char* what = "") + const override; + ~IStreamAdapter() override; + + private: + std::istream* istream_; + void validate(const char* what) const; +}; + +} // namespace serialize +} // namespace caffe2 diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/read_adapter_interface.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/read_adapter_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..0a6b5b74a762e7c90b64a6f93717802f658dd164 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/read_adapter_interface.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +#include "c10/macros/Macros.h" + +namespace caffe2 { +namespace serialize { + +// this is the interface for the (file/stream/memory) reader in +// PyTorchStreamReader. with this interface, we can extend the support +// besides standard istream +class TORCH_API ReadAdapterInterface { + public: + virtual size_t size() const = 0; + virtual size_t read(uint64_t pos, void* buf, size_t n, const char* what = "") + const = 0; + virtual ~ReadAdapterInterface(); +}; + +} // namespace serialize +} // namespace caffe2 diff --git a/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/versions.h b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/versions.h new file mode 100644 index 0000000000000000000000000000000000000000..6e2c27adc8fae8fdb07dcd9dafe72c503c243c2b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/versions.h @@ -0,0 +1,133 @@ +#pragma once +#include + +namespace caffe2 { +namespace serialize { + +constexpr uint64_t kMinSupportedFileFormatVersion = 0x1L; + +constexpr uint64_t kMaxSupportedFileFormatVersion = 0xAL; + +// Versions (i.e. why was the version number bumped?) + +// Note [Dynamic Versions and torch.jit.save vs. torch.save] +// +// Our versioning scheme has a "produced file format version" which +// describes how an archive is to be read. The version written in an archive +// is at least this current produced file format version, but may be greater +// if it includes certain symbols. We refer to these conditional versions +// as "dynamic," since they are identified at runtime. +// +// Dynamic versioning is useful when an operator's semantics are updated. +// When using torch.jit.save we want those semantics to be preserved. If +// we bumped the produced file format version on every change, however, +// then older versions of PyTorch couldn't read even simple archives, like +// a single tensor, from newer versions of PyTorch. Instead, we +// assign dynamic versions to these changes that override the +// produced file format version as needed. That is, when the semantics +// of torch.div changed it was assigned dynamic version 4, and when +// torch.jit.saving modules that use torch.div those archives also have +// (at least) version 4. This prevents earlier versions of PyTorch +// from accidentally performing the wrong kind of division. Modules +// that don't use torch.div or other operators with dynamic versions +// can write the produced file format version, and these programs will +// run as expected on earlier versions of PyTorch. +// +// While torch.jit.save attempts to preserve operator semantics, +// torch.save does not. torch.save is analogous to pickling Python, so +// a function that uses torch.div will have different behavior if torch.saved +// and torch.loaded across PyTorch versions. From a technical perspective, +// torch.save ignores dynamic versioning. + +// 1. Initial version +// 2. Removed op_version_set version numbers +// 3. Added type tags to pickle serialization of container types +// 4. (Dynamic) Stopped integer division using torch.div +// (a versioned symbol preserves the historic behavior of versions 1--3) +// 5. (Dynamic) Stops torch.full inferring a floating point dtype +// when given bool or integer fill values. +// 6. Write version string to `./data/version` instead of `version`. + +// [12/15/2021] +// kProducedFileFormatVersion is set to 7 from 3 due to a different +// interpretation of what file format version is. +// Whenever there is new upgrader introduced, +// this number should be bumped. +// The reasons that version is bumped in the past: +// 1. aten::div is changed at version 4 +// 2. aten::full is changed at version 5 +// 3. torch.package uses version 6 +// 4. Introduce new upgrader design and set the version number to 7 +// mark this change +// -------------------------------------------------- +// We describe new operator version bump reasons here: +// 1) [01/24/2022] +// We bump the version number to 8 to update aten::linspace +// and aten::linspace.out to error out when steps is not +// provided. (see: https://github.com/pytorch/pytorch/issues/55951) +// 2) [01/30/2022] +// Bump the version number to 9 to update aten::logspace and +// and aten::logspace.out to error out when steps is not +// provided. (see: https://github.com/pytorch/pytorch/issues/55951) +// 3) [02/11/2022] +// Bump the version number to 10 to update aten::gelu and +// and aten::gelu.out to support the new approximate kwarg. +// (see: https://github.com/pytorch/pytorch/pull/61439) +constexpr uint64_t kProducedFileFormatVersion = 0xAL; + +// Absolute minimum version we will write packages. This +// means that every package from now on will always be +// greater than this number. +constexpr uint64_t kMinProducedFileFormatVersion = 0x3L; + +// The version we write when the archive contains bytecode. +// It must be higher or eq to kProducedFileFormatVersion. +// Because torchscript changes is likely introduce bytecode change. +// If kProducedFileFormatVersion is increased, kProducedBytecodeVersion +// should be increased too. The relationship is: +// kMaxSupportedFileFormatVersion >= (most likely ==) kProducedBytecodeVersion +// >= kProducedFileFormatVersion +// If a format change is forward compatible (still readable by older +// executables), we will not increment the version number, to minimize the +// risk of breaking existing clients. TODO: A better way would be to allow +// the caller that creates a model to specify a maximum version that its +// clients can accept. +// Versions: +// 0x1L: Initial version +// 0x2L: (Comment missing) +// 0x3L: (Comment missing) +// 0x4L: (update) Added schema to function tuple. Forward-compatible change. +// 0x5L: (update) Update bytecode is sharing constant tensor files from +// torchscript, and only serialize extra tensors that are not in the +// torchscript constant table. Also update tensor storage schema adapting to +// the unify format, the root key of tensor storage is updated from {index} to +// {the_pointer_value_the_tensor.storage}, for example: +// `140245072983168.storage` Forward-compatibility change. +// 0x6L: Implicit opereator versioning using number of specified argument. +// Refer to the summary of https://github.com/pytorch/pytorch/pull/56845 for +// details. +// 0x7L: Enable support for operators with default arguments plus out +// arguments. Refer. See https://github.com/pytorch/pytorch/pull/63651 for +// details. +// 0x8L: Emit promoted operators as instructions. See +// https://github.com/pytorch/pytorch/pull/71662 for details. +// 0x9L: Change serialization format from pickle to format This version is to +// serve migration. v8 pickle and v9 flatbuffer are the same. Refer to the +// summary of https://github.com/pytorch/pytorch/pull/75201 for more details. +constexpr uint64_t kProducedBytecodeVersion = 0x8L; + +// static_assert( +// kProducedBytecodeVersion >= kProducedFileFormatVersion, +// "kProducedBytecodeVersion must be higher or equal to +// kProducedFileFormatVersion."); + +// Introduce kMinSupportedBytecodeVersion and kMaxSupportedBytecodeVersion +// for limited backward/forward compatibility support of bytecode. If +// kMinSupportedBytecodeVersion <= model_version <= kMaxSupportedBytecodeVersion +// (in loader), we should support this model_version. For example, we provide a +// wrapper to handle an updated operator. +constexpr uint64_t kMinSupportedBytecodeVersion = 0x4L; +constexpr uint64_t kMaxSupportedBytecodeVersion = 0x9L; + +} // namespace serialize +} // namespace caffe2 diff --git a/.venv/lib/python3.11/site-packages/torch/include/clog.h b/.venv/lib/python3.11/site-packages/torch/include/clog.h new file mode 100644 index 0000000000000000000000000000000000000000..bf09cd0cb6de4ff632807ad2e58df9e402906878 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/clog.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#define CLOG_NONE 0 +#define CLOG_FATAL 1 +#define CLOG_ERROR 2 +#define CLOG_WARNING 3 +#define CLOG_INFO 4 +#define CLOG_DEBUG 5 + +#ifndef CLOG_VISIBILITY +#if defined(__ELF__) +#define CLOG_VISIBILITY __attribute__((__visibility__("internal"))) +#elif defined(__MACH__) +#define CLOG_VISIBILITY __attribute__((__visibility__("hidden"))) +#else +#define CLOG_VISIBILITY +#endif +#endif + +#ifndef CLOG_ARGUMENTS_FORMAT +#if defined(__GNUC__) +#define CLOG_ARGUMENTS_FORMAT __attribute__((__format__(__printf__, 1, 2))) +#else +#define CLOG_ARGUMENTS_FORMAT +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +CLOG_VISIBILITY void clog_vlog_debug( + const char* module, + const char* format, + va_list args); +CLOG_VISIBILITY void clog_vlog_info( + const char* module, + const char* format, + va_list args); +CLOG_VISIBILITY void clog_vlog_warning( + const char* module, + const char* format, + va_list args); +CLOG_VISIBILITY void clog_vlog_error( + const char* module, + const char* format, + va_list args); +CLOG_VISIBILITY void clog_vlog_fatal( + const char* module, + const char* format, + va_list args); + +#define CLOG_DEFINE_LOG_DEBUG(log_debug_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_debug_function_name(const char* format, ...) { \ + if (level >= CLOG_DEBUG) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_debug(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_INFO(log_info_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_info_function_name(const char* format, ...) { \ + if (level >= CLOG_INFO) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_info(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_WARNING(log_warning_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_warning_function_name(const char* format, ...) { \ + if (level >= CLOG_WARNING) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_warning(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_ERROR(log_error_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_error_function_name(const char* format, ...) { \ + if (level >= CLOG_ERROR) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_error(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_FATAL(log_fatal_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_fatal_function_name(const char* format, ...) { \ + if (level >= CLOG_FATAL) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_fatal(module, format, args); \ + va_end(args); \ + } \ + abort(); \ + } + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/.venv/lib/python3.11/site-packages/torch/include/cpuinfo.h b/.venv/lib/python3.11/site-packages/torch/include/cpuinfo.h new file mode 100644 index 0000000000000000000000000000000000000000..8bb1db4e96470883bcc04a0d803770a3fc093015 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/cpuinfo.h @@ -0,0 +1,2245 @@ +#pragma once +#ifndef CPUINFO_H +#define CPUINFO_H + +#ifndef __cplusplus +#include +#endif + +#ifdef __APPLE__ +#include +#endif + +#include + +/* Identify architecture and define corresponding macro */ + +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86) +#define CPUINFO_ARCH_X86 1 +#endif + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#define CPUINFO_ARCH_X86_64 1 +#endif + +#if defined(__arm__) || defined(_M_ARM) +#define CPUINFO_ARCH_ARM 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define CPUINFO_ARCH_ARM64 1 +#endif + +#if defined(__PPC64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) +#define CPUINFO_ARCH_PPC64 1 +#endif + +#if defined(__asmjs__) +#define CPUINFO_ARCH_ASMJS 1 +#endif + +#if defined(__wasm__) +#if defined(__wasm_simd128__) +#define CPUINFO_ARCH_WASMSIMD 1 +#else +#define CPUINFO_ARCH_WASM 1 +#endif +#endif + +#if defined(__riscv) +#if (__riscv_xlen == 32) +#define CPUINFO_ARCH_RISCV32 1 +#elif (__riscv_xlen == 64) +#define CPUINFO_ARCH_RISCV64 1 +#endif +#endif + +/* Define other architecture-specific macros as 0 */ + +#ifndef CPUINFO_ARCH_X86 +#define CPUINFO_ARCH_X86 0 +#endif + +#ifndef CPUINFO_ARCH_X86_64 +#define CPUINFO_ARCH_X86_64 0 +#endif + +#ifndef CPUINFO_ARCH_ARM +#define CPUINFO_ARCH_ARM 0 +#endif + +#ifndef CPUINFO_ARCH_ARM64 +#define CPUINFO_ARCH_ARM64 0 +#endif + +#ifndef CPUINFO_ARCH_PPC64 +#define CPUINFO_ARCH_PPC64 0 +#endif + +#ifndef CPUINFO_ARCH_ASMJS +#define CPUINFO_ARCH_ASMJS 0 +#endif + +#ifndef CPUINFO_ARCH_WASM +#define CPUINFO_ARCH_WASM 0 +#endif + +#ifndef CPUINFO_ARCH_WASMSIMD +#define CPUINFO_ARCH_WASMSIMD 0 +#endif + +#ifndef CPUINFO_ARCH_RISCV32 +#define CPUINFO_ARCH_RISCV32 0 +#endif + +#ifndef CPUINFO_ARCH_RISCV64 +#define CPUINFO_ARCH_RISCV64 0 +#endif + +#if CPUINFO_ARCH_X86 && defined(_MSC_VER) +#define CPUINFO_ABI __cdecl +#elif CPUINFO_ARCH_X86 && defined(__GNUC__) +#define CPUINFO_ABI __attribute__((__cdecl__)) +#else +#define CPUINFO_ABI +#endif + +#define CPUINFO_CACHE_UNIFIED 0x00000001 +#define CPUINFO_CACHE_INCLUSIVE 0x00000002 +#define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 + +struct cpuinfo_cache { + /** Cache size in bytes */ + uint32_t size; + /** Number of ways of associativity */ + uint32_t associativity; + /** Number of sets */ + uint32_t sets; + /** Number of partitions */ + uint32_t partitions; + /** Line size in bytes */ + uint32_t line_size; + /** + * Binary characteristics of the cache (unified cache, inclusive cache, + * cache with complex indexing). + * + * @see CPUINFO_CACHE_UNIFIED, CPUINFO_CACHE_INCLUSIVE, + * CPUINFO_CACHE_COMPLEX_INDEXING + */ + uint32_t flags; + /** Index of the first logical processor that shares this cache */ + uint32_t processor_start; + /** Number of logical processors that share this cache */ + uint32_t processor_count; +}; + +struct cpuinfo_trace_cache { + uint32_t uops; + uint32_t associativity; +}; + +#define CPUINFO_PAGE_SIZE_4KB 0x1000 +#define CPUINFO_PAGE_SIZE_1MB 0x100000 +#define CPUINFO_PAGE_SIZE_2MB 0x200000 +#define CPUINFO_PAGE_SIZE_4MB 0x400000 +#define CPUINFO_PAGE_SIZE_16MB 0x1000000 +#define CPUINFO_PAGE_SIZE_1GB 0x40000000 + +struct cpuinfo_tlb { + uint32_t entries; + uint32_t associativity; + uint64_t pages; +}; + +/** Vendor of processor core design */ +enum cpuinfo_vendor { + /** Processor vendor is not known to the library, or the library failed + to get vendor information from the OS. */ + cpuinfo_vendor_unknown = 0, + + /* Active vendors of modern CPUs */ + + /** + * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor + * microarchitectures. + * + * Sold its ARM design subsidiary in 2006. The last ARM processor design + * was released in 2004. + */ + cpuinfo_vendor_intel = 1, + /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor + microarchitectures. */ + cpuinfo_vendor_amd = 2, + /** ARM Holdings plc. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_arm = 3, + /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_qualcomm = 4, + /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_apple = 5, + /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor + microarchitectures. */ + cpuinfo_vendor_samsung = 6, + /** Nvidia Corporation. Vendor of ARM64-compatible processor + microarchitectures. */ + cpuinfo_vendor_nvidia = 7, + /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_mips = 8, + /** International Business Machines Corporation. Vendor of PowerPC + processor microarchitectures. */ + cpuinfo_vendor_ibm = 9, + /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_ingenic = 10, + /** + * VIA Technologies, Inc. Vendor of x86 and x86-64 processor + * microarchitectures. + * + * Processors are designed by Centaur Technology, a subsidiary of VIA + * Technologies. + */ + cpuinfo_vendor_via = 11, + /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */ + cpuinfo_vendor_cavium = 12, + /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_broadcom = 13, + /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor + microarchitectures. */ + cpuinfo_vendor_apm = 14, + /** + * Huawei Technologies Co., Ltd. Vendor of ARM64 processor + * microarchitectures. + * + * Processors are designed by HiSilicon, a subsidiary of Huawei. + */ + cpuinfo_vendor_huawei = 15, + /** + * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor + * of x86-64 processor microarchitectures. + * + * Processors are variants of AMD cores. + */ + cpuinfo_vendor_hygon = 16, + /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */ + cpuinfo_vendor_sifive = 17, + + /* Active vendors of embedded CPUs */ + + /** Texas Instruments Inc. Vendor of ARM processor microarchitectures. + */ + cpuinfo_vendor_texas_instruments = 30, + /** Marvell Technology Group Ltd. Vendor of ARM processor + * microarchitectures. + */ + cpuinfo_vendor_marvell = 31, + /** RDC Semiconductor Co., Ltd. Vendor of x86 processor + microarchitectures. */ + cpuinfo_vendor_rdc = 32, + /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ + cpuinfo_vendor_dmp = 33, + /** Motorola, Inc. Vendor of PowerPC and ARM processor + microarchitectures. */ + cpuinfo_vendor_motorola = 34, + + /* Defunct CPU vendors */ + + /** + * Transmeta Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 2004. + * Transmeta processors implemented VLIW ISA and used binary translation + * to execute x86 code. + */ + cpuinfo_vendor_transmeta = 50, + /** + * Cyrix Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1996. + */ + cpuinfo_vendor_cyrix = 51, + /** + * Rise Technology. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1999. + */ + cpuinfo_vendor_rise = 52, + /** + * National Semiconductor. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 1999. The last processor design was + * released in 1998. + */ + cpuinfo_vendor_nsc = 53, + /** + * Silicon Integrated Systems. Vendor of x86 processor + * microarchitectures. + * + * Sold its x86 design subsidiary in 2001. The last processor design was + * released in 2001. + */ + cpuinfo_vendor_sis = 54, + /** + * NexGen. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1994. + * NexGen designed the first x86 microarchitecture which decomposed x86 + * instructions into simple microoperations. + */ + cpuinfo_vendor_nexgen = 55, + /** + * United Microelectronics Corporation. Vendor of x86 processor + * microarchitectures. + * + * Ceased x86 in the early 1990s. The last processor design was released + * in 1991. Designed U5C and U5D processors. Both are 486 level. + */ + cpuinfo_vendor_umc = 56, + /** + * Digital Equipment Corporation. Vendor of ARM processor + * microarchitecture. + * + * Sold its ARM designs in 1997. The last processor design was released + * in 1997. + */ + cpuinfo_vendor_dec = 57, +}; + +/** + * Processor microarchitecture + * + * Processors with different microarchitectures often have different instruction + * performance characteristics, and may have dramatically different pipeline + * organization. + */ +enum cpuinfo_uarch { + /** Microarchitecture is unknown, or the library failed to get + information about the microarchitecture from OS */ + cpuinfo_uarch_unknown = 0, + + /** Pentium and Pentium MMX microarchitecture. */ + cpuinfo_uarch_p5 = 0x00100100, + /** Intel Quark microarchitecture. */ + cpuinfo_uarch_quark = 0x00100101, + + /** Pentium Pro, Pentium II, and Pentium III. */ + cpuinfo_uarch_p6 = 0x00100200, + /** Pentium M. */ + cpuinfo_uarch_dothan = 0x00100201, + /** Intel Core microarchitecture. */ + cpuinfo_uarch_yonah = 0x00100202, + /** Intel Core 2 microarchitecture on 65 nm process. */ + cpuinfo_uarch_conroe = 0x00100203, + /** Intel Core 2 microarchitecture on 45 nm process. */ + cpuinfo_uarch_penryn = 0x00100204, + /** Intel Nehalem and Westmere microarchitectures (Core i3/i5/i7 1st + gen). */ + cpuinfo_uarch_nehalem = 0x00100205, + /** Intel Sandy Bridge microarchitecture (Core i3/i5/i7 2nd gen). */ + cpuinfo_uarch_sandy_bridge = 0x00100206, + /** Intel Ivy Bridge microarchitecture (Core i3/i5/i7 3rd gen). */ + cpuinfo_uarch_ivy_bridge = 0x00100207, + /** Intel Haswell microarchitecture (Core i3/i5/i7 4th gen). */ + cpuinfo_uarch_haswell = 0x00100208, + /** Intel Broadwell microarchitecture. */ + cpuinfo_uarch_broadwell = 0x00100209, + /** Intel Sky Lake microarchitecture (14 nm, including + Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */ + cpuinfo_uarch_sky_lake = 0x0010020A, + /** DEPRECATED (Intel Kaby Lake microarchitecture). */ + cpuinfo_uarch_kaby_lake = 0x0010020A, + /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */ + cpuinfo_uarch_palm_cove = 0x0010020B, + /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */ + cpuinfo_uarch_sunny_cove = 0x0010020C, + + /** Pentium 4 with Willamette, Northwood, or Foster cores. */ + cpuinfo_uarch_willamette = 0x00100300, + /** Pentium 4 with Prescott and later cores. */ + cpuinfo_uarch_prescott = 0x00100301, + + /** Intel Atom on 45 nm process. */ + cpuinfo_uarch_bonnell = 0x00100400, + /** Intel Atom on 32 nm process. */ + cpuinfo_uarch_saltwell = 0x00100401, + /** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */ + cpuinfo_uarch_silvermont = 0x00100402, + /** Intel Airmont microarchitecture (14 nm out-of-order Atom). */ + cpuinfo_uarch_airmont = 0x00100403, + /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */ + cpuinfo_uarch_goldmont = 0x00100404, + /** Intel Goldmont Plus microarchitecture (Gemini Lake). */ + cpuinfo_uarch_goldmont_plus = 0x00100405, + + /** Intel Knights Ferry HPC boards. */ + cpuinfo_uarch_knights_ferry = 0x00100500, + /** Intel Knights Corner HPC boards (aka Xeon Phi). */ + cpuinfo_uarch_knights_corner = 0x00100501, + /** Intel Knights Landing microarchitecture (second-gen MIC). */ + cpuinfo_uarch_knights_landing = 0x00100502, + /** Intel Knights Hill microarchitecture (third-gen MIC). */ + cpuinfo_uarch_knights_hill = 0x00100503, + /** Intel Knights Mill Xeon Phi. */ + cpuinfo_uarch_knights_mill = 0x00100504, + + /** Intel/Marvell XScale series. */ + cpuinfo_uarch_xscale = 0x00100600, + + /** AMD K5. */ + cpuinfo_uarch_k5 = 0x00200100, + /** AMD K6 and alike. */ + cpuinfo_uarch_k6 = 0x00200101, + /** AMD Athlon and Duron. */ + cpuinfo_uarch_k7 = 0x00200102, + /** AMD Athlon 64, Opteron 64. */ + cpuinfo_uarch_k8 = 0x00200103, + /** AMD Family 10h (Barcelona, Istambul, Magny-Cours). */ + cpuinfo_uarch_k10 = 0x00200104, + /** + * AMD Bulldozer microarchitecture + * Zambezi FX-series CPUs, Zurich, Valencia and Interlagos Opteron CPUs. + */ + cpuinfo_uarch_bulldozer = 0x00200105, + /** + * AMD Piledriver microarchitecture + * Vishera FX-series CPUs, Trinity and Richland APUs, Delhi, Seoul, Abu + * Dhabi Opteron CPUs. + */ + cpuinfo_uarch_piledriver = 0x00200106, + /** AMD Steamroller microarchitecture (Kaveri APUs). */ + cpuinfo_uarch_steamroller = 0x00200107, + /** AMD Excavator microarchitecture (Carizzo APUs). */ + cpuinfo_uarch_excavator = 0x00200108, + /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen = 0x00200109, + /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen2 = 0x0020010A, + /** AMD Zen 3 microarchitecture. */ + cpuinfo_uarch_zen3 = 0x0020010B, + /** AMD Zen 4 microarchitecture. */ + cpuinfo_uarch_zen4 = 0x0020010C, + + /** NSC Geode and AMD Geode GX and LX. */ + cpuinfo_uarch_geode = 0x00200200, + /** AMD Bobcat mobile microarchitecture. */ + cpuinfo_uarch_bobcat = 0x00200201, + /** AMD Jaguar mobile microarchitecture. */ + cpuinfo_uarch_jaguar = 0x00200202, + /** AMD Puma mobile microarchitecture. */ + cpuinfo_uarch_puma = 0x00200203, + + /** ARM7 series. */ + cpuinfo_uarch_arm7 = 0x00300100, + /** ARM9 series. */ + cpuinfo_uarch_arm9 = 0x00300101, + /** ARM 1136, ARM 1156, ARM 1176, or ARM 11MPCore. */ + cpuinfo_uarch_arm11 = 0x00300102, + + /** ARM Cortex-A5. */ + cpuinfo_uarch_cortex_a5 = 0x00300205, + /** ARM Cortex-A7. */ + cpuinfo_uarch_cortex_a7 = 0x00300207, + /** ARM Cortex-A8. */ + cpuinfo_uarch_cortex_a8 = 0x00300208, + /** ARM Cortex-A9. */ + cpuinfo_uarch_cortex_a9 = 0x00300209, + /** ARM Cortex-A12. */ + cpuinfo_uarch_cortex_a12 = 0x00300212, + /** ARM Cortex-A15. */ + cpuinfo_uarch_cortex_a15 = 0x00300215, + /** ARM Cortex-A17. */ + cpuinfo_uarch_cortex_a17 = 0x00300217, + + /** ARM Cortex-A32. */ + cpuinfo_uarch_cortex_a32 = 0x00300332, + /** ARM Cortex-A35. */ + cpuinfo_uarch_cortex_a35 = 0x00300335, + /** ARM Cortex-A53. */ + cpuinfo_uarch_cortex_a53 = 0x00300353, + /** ARM Cortex-A55 revision 0 (restricted dual-issue capabilities + compared to revision 1+). */ + cpuinfo_uarch_cortex_a55r0 = 0x00300354, + /** ARM Cortex-A55. */ + cpuinfo_uarch_cortex_a55 = 0x00300355, + /** ARM Cortex-A57. */ + cpuinfo_uarch_cortex_a57 = 0x00300357, + /** ARM Cortex-A65. */ + cpuinfo_uarch_cortex_a65 = 0x00300365, + /** ARM Cortex-A72. */ + cpuinfo_uarch_cortex_a72 = 0x00300372, + /** ARM Cortex-A73. */ + cpuinfo_uarch_cortex_a73 = 0x00300373, + /** ARM Cortex-A75. */ + cpuinfo_uarch_cortex_a75 = 0x00300375, + /** ARM Cortex-A76. */ + cpuinfo_uarch_cortex_a76 = 0x00300376, + /** ARM Cortex-A77. */ + cpuinfo_uarch_cortex_a77 = 0x00300377, + /** ARM Cortex-A78. */ + cpuinfo_uarch_cortex_a78 = 0x00300378, + + /** ARM Neoverse N1. */ + cpuinfo_uarch_neoverse_n1 = 0x00300400, + /** ARM Neoverse E1. */ + cpuinfo_uarch_neoverse_e1 = 0x00300401, + /** ARM Neoverse V1. */ + cpuinfo_uarch_neoverse_v1 = 0x00300402, + /** ARM Neoverse N2. */ + cpuinfo_uarch_neoverse_n2 = 0x00300403, + /** ARM Neoverse V2. */ + cpuinfo_uarch_neoverse_v2 = 0x00300404, + + /** ARM Cortex-X1. */ + cpuinfo_uarch_cortex_x1 = 0x00300501, + /** ARM Cortex-X2. */ + cpuinfo_uarch_cortex_x2 = 0x00300502, + /** ARM Cortex-X3. */ + cpuinfo_uarch_cortex_x3 = 0x00300503, + /** ARM Cortex-X4. */ + cpuinfo_uarch_cortex_x4 = 0x00300504, + + /** ARM Cortex-A510. */ + cpuinfo_uarch_cortex_a510 = 0x00300551, + /** ARM Cortex-A520. */ + cpuinfo_uarch_cortex_a520 = 0x00300552, + /** ARM Cortex-A710. */ + cpuinfo_uarch_cortex_a710 = 0x00300571, + /** ARM Cortex-A715. */ + cpuinfo_uarch_cortex_a715 = 0x00300572, + /** ARM Cortex-A720. */ + cpuinfo_uarch_cortex_a720 = 0x00300573, + + /** Qualcomm Scorpion. */ + cpuinfo_uarch_scorpion = 0x00400100, + /** Qualcomm Krait. */ + cpuinfo_uarch_krait = 0x00400101, + /** Qualcomm Kryo. */ + cpuinfo_uarch_kryo = 0x00400102, + /** Qualcomm Falkor. */ + cpuinfo_uarch_falkor = 0x00400103, + /** Qualcomm Saphira. */ + cpuinfo_uarch_saphira = 0x00400104, + + /** Nvidia Denver. */ + cpuinfo_uarch_denver = 0x00500100, + /** Nvidia Denver 2. */ + cpuinfo_uarch_denver2 = 0x00500101, + /** Nvidia Carmel. */ + cpuinfo_uarch_carmel = 0x00500102, + + /** Samsung Exynos M1 (Exynos 8890 big cores). */ + cpuinfo_uarch_exynos_m1 = 0x00600100, + /** Samsung Exynos M2 (Exynos 8895 big cores). */ + cpuinfo_uarch_exynos_m2 = 0x00600101, + /** Samsung Exynos M3 (Exynos 9810 big cores). */ + cpuinfo_uarch_exynos_m3 = 0x00600102, + /** Samsung Exynos M4 (Exynos 9820 big cores). */ + cpuinfo_uarch_exynos_m4 = 0x00600103, + /** Samsung Exynos M5 (Exynos 9830 big cores). */ + cpuinfo_uarch_exynos_m5 = 0x00600104, + + /* Deprecated synonym for Cortex-A76 */ + cpuinfo_uarch_cortex_a76ae = 0x00300376, + /* Deprecated names for Exynos. */ + cpuinfo_uarch_mongoose_m1 = 0x00600100, + cpuinfo_uarch_mongoose_m2 = 0x00600101, + cpuinfo_uarch_meerkat_m3 = 0x00600102, + cpuinfo_uarch_meerkat_m4 = 0x00600103, + + /** Apple A6 and A6X processors. */ + cpuinfo_uarch_swift = 0x00700100, + /** Apple A7 processor. */ + cpuinfo_uarch_cyclone = 0x00700101, + /** Apple A8 and A8X processor. */ + cpuinfo_uarch_typhoon = 0x00700102, + /** Apple A9 and A9X processor. */ + cpuinfo_uarch_twister = 0x00700103, + /** Apple A10 and A10X processor. */ + cpuinfo_uarch_hurricane = 0x00700104, + /** Apple A11 processor (big cores). */ + cpuinfo_uarch_monsoon = 0x00700105, + /** Apple A11 processor (little cores). */ + cpuinfo_uarch_mistral = 0x00700106, + /** Apple A12 processor (big cores). */ + cpuinfo_uarch_vortex = 0x00700107, + /** Apple A12 processor (little cores). */ + cpuinfo_uarch_tempest = 0x00700108, + /** Apple A13 processor (big cores). */ + cpuinfo_uarch_lightning = 0x00700109, + /** Apple A13 processor (little cores). */ + cpuinfo_uarch_thunder = 0x0070010A, + /** Apple A14 / M1 processor (big cores). */ + cpuinfo_uarch_firestorm = 0x0070010B, + /** Apple A14 / M1 processor (little cores). */ + cpuinfo_uarch_icestorm = 0x0070010C, + /** Apple A15 / M2 processor (big cores). */ + cpuinfo_uarch_avalanche = 0x0070010D, + /** Apple A15 / M2 processor (little cores). */ + cpuinfo_uarch_blizzard = 0x0070010E, + + /** Cavium ThunderX. */ + cpuinfo_uarch_thunderx = 0x00800100, + /** Cavium ThunderX2 (originally Broadcom Vulkan). */ + cpuinfo_uarch_thunderx2 = 0x00800200, + + /** Marvell PJ4. */ + cpuinfo_uarch_pj4 = 0x00900100, + + /** Broadcom Brahma B15. */ + cpuinfo_uarch_brahma_b15 = 0x00A00100, + /** Broadcom Brahma B53. */ + cpuinfo_uarch_brahma_b53 = 0x00A00101, + + /** Applied Micro X-Gene. */ + cpuinfo_uarch_xgene = 0x00B00100, + + /* Hygon Dhyana (a modification of AMD Zen for Chinese market). */ + cpuinfo_uarch_dhyana = 0x01000100, + + /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */ + cpuinfo_uarch_taishan_v110 = 0x00C00100, +}; + +struct cpuinfo_processor { + /** SMT (hyperthread) ID within a core */ + uint32_t smt_id; + /** Core containing this logical processor */ + const struct cpuinfo_core* core; + /** Cluster of cores containing this logical processor */ + const struct cpuinfo_cluster* cluster; + /** Physical package containing this logical processor */ + const struct cpuinfo_package* package; +#if defined(__linux__) + /** + * Linux-specific ID for the logical processor: + * - Linux kernel exposes information about this logical processor in + * /sys/devices/system/cpu/cpu/ + * - Bit in the cpu_set_t identifies this logical processor + */ + int linux_id; +#endif +#if defined(_WIN32) || defined(__CYGWIN__) + /** Windows-specific ID for the group containing the logical processor. + */ + uint16_t windows_group_id; + /** + * Windows-specific ID of the logical processor within its group: + * - Bit in the KAFFINITY mask identifies this + * logical processor within its group. + */ + uint16_t windows_processor_id; +#endif +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** APIC ID (unique x86-specific ID of the logical processor) */ + uint32_t apic_id; +#endif + struct { + /** Level 1 instruction cache */ + const struct cpuinfo_cache* l1i; + /** Level 1 data cache */ + const struct cpuinfo_cache* l1d; + /** Level 2 unified or data cache */ + const struct cpuinfo_cache* l2; + /** Level 3 unified or data cache */ + const struct cpuinfo_cache* l3; + /** Level 4 unified or data cache */ + const struct cpuinfo_cache* l4; + } cache; +}; + +struct cpuinfo_core { + /** Index of the first logical processor on this core. */ + uint32_t processor_start; + /** Number of logical processors on this core */ + uint32_t processor_count; + /** Core ID within a package */ + uint32_t core_id; + /** Cluster containing this core */ + const struct cpuinfo_cluster* cluster; + /** Physical package containing this core. */ + const struct cpuinfo_package* package; + /** Vendor of the CPU microarchitecture for this core */ + enum cpuinfo_vendor vendor; + /** CPU microarchitecture for this core */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register for this core */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) for this core */ + uint32_t midr; +#endif + /** Clock rate (non-Turbo) of the core, in Hz */ + uint64_t frequency; +}; + +struct cpuinfo_cluster { + /** Index of the first logical processor in the cluster */ + uint32_t processor_start; + /** Number of logical processors in the cluster */ + uint32_t processor_count; + /** Index of the first core in the cluster */ + uint32_t core_start; + /** Number of cores on the cluster */ + uint32_t core_count; + /** Cluster ID within a package */ + uint32_t cluster_id; + /** Physical package containing the cluster */ + const struct cpuinfo_package* package; + /** CPU microarchitecture vendor of the cores in the cluster */ + enum cpuinfo_vendor vendor; + /** CPU microarchitecture of the cores in the cluster */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register of the cores in the cluster */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) of the cores in the cluster */ + uint32_t midr; +#endif + /** Clock rate (non-Turbo) of the cores in the cluster, in Hz */ + uint64_t frequency; +}; + +#define CPUINFO_PACKAGE_NAME_MAX 48 + +struct cpuinfo_package { + /** SoC or processor chip model name */ + char name[CPUINFO_PACKAGE_NAME_MAX]; + /** Index of the first logical processor on this physical package */ + uint32_t processor_start; + /** Number of logical processors on this physical package */ + uint32_t processor_count; + /** Index of the first core on this physical package */ + uint32_t core_start; + /** Number of cores on this physical package */ + uint32_t core_count; + /** Index of the first cluster of cores on this physical package */ + uint32_t cluster_start; + /** Number of clusters of cores on this physical package */ + uint32_t cluster_count; +}; + +struct cpuinfo_uarch_info { + /** Type of CPU microarchitecture */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register for the microarchitecture */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) for the microarchitecture */ + uint32_t midr; +#endif + /** Number of logical processors with the microarchitecture */ + uint32_t processor_count; + /** Number of cores with the microarchitecture */ + uint32_t core_count; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +bool CPUINFO_ABI cpuinfo_initialize(void); + +void CPUINFO_ABI cpuinfo_deinitialize(void); + +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +/* This structure is not a part of stable API. Use cpuinfo_has_x86_* functions + * instead. */ +struct cpuinfo_x86_isa { +#if CPUINFO_ARCH_X86 + bool rdtsc; +#endif + bool rdtscp; + bool rdpid; + bool sysenter; +#if CPUINFO_ARCH_X86 + bool syscall; +#endif + bool msr; + bool clzero; + bool clflush; + bool clflushopt; + bool mwait; + bool mwaitx; +#if CPUINFO_ARCH_X86 + bool emmx; +#endif + bool fxsave; + bool xsave; +#if CPUINFO_ARCH_X86 + bool fpu; + bool mmx; + bool mmx_plus; +#endif + bool three_d_now; + bool three_d_now_plus; +#if CPUINFO_ARCH_X86 + bool three_d_now_geode; +#endif + bool prefetch; + bool prefetchw; + bool prefetchwt1; +#if CPUINFO_ARCH_X86 + bool daz; + bool sse; + bool sse2; +#endif + bool sse3; + bool ssse3; + bool sse4_1; + bool sse4_2; + bool sse4a; + bool misaligned_sse; + bool avx; + bool avxvnni; + bool fma3; + bool fma4; + bool xop; + bool f16c; + bool avx2; + bool avx512f; + bool avx512pf; + bool avx512er; + bool avx512cd; + bool avx512dq; + bool avx512bw; + bool avx512vl; + bool avx512ifma; + bool avx512vbmi; + bool avx512vbmi2; + bool avx512bitalg; + bool avx512vpopcntdq; + bool avx512vnni; + bool avx512bf16; + bool avx512fp16; + bool avx512vp2intersect; + bool avx512_4vnniw; + bool avx512_4fmaps; + bool amx_bf16; + bool amx_tile; + bool amx_int8; + bool amx_fp16; + bool avx_vnni_int8; + bool avx_vnni_int16; + bool avx_ne_convert; + bool hle; + bool rtm; + bool xtest; + bool mpx; +#if CPUINFO_ARCH_X86 + bool cmov; + bool cmpxchg8b; +#endif + bool cmpxchg16b; + bool clwb; + bool movbe; +#if CPUINFO_ARCH_X86_64 + bool lahf_sahf; +#endif + bool fs_gs_base; + bool lzcnt; + bool popcnt; + bool tbm; + bool bmi; + bool bmi2; + bool adx; + bool aes; + bool vaes; + bool pclmulqdq; + bool vpclmulqdq; + bool gfni; + bool rdrand; + bool rdseed; + bool sha; + bool rng; + bool ace; + bool ace2; + bool phe; + bool pmm; + bool lwp; +}; + +extern struct cpuinfo_x86_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_x86_rdtsc(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.rdtsc; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdtscp(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdtscp; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdpid(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdpid; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_clzero(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.clzero; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mwait(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mwait; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mwaitx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mwaitx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fxsave(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fxsave; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_xsave(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xsave; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fpu(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.fpu; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mmx(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.mmx; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mmx_plus(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.mmx_plus; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_3dnow(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.three_d_now; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_3dnow_plus(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.three_d_now_plus; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_3dnow_geode(void) { +#if CPUINFO_ARCH_X86_64 + return false; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return false; +#else + return cpuinfo_isa.three_d_now_geode; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_prefetch(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetch; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_prefetchw(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetchw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_prefetchwt1(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetchwt1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_daz(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.daz; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse2(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse2; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse3(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse3; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_ssse3(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.ssse3; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse4_1(void) { +#if CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse4_1; +#endif +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.sse4_1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse4_2(void) { +#if CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse4_2; +#endif +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.sse4_2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse4a(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.sse4a; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_misaligned_sse(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.misaligned_sse; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avxvnni(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avxvnni; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fma3(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fma3; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fma4(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fma4; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_xop(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xop; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_f16c(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.f16c; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512f(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512f; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512pf(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512pf; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512er(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512er; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512cd(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512cd; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512dq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512dq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512bw(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vl(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vl; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512ifma(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512ifma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vbmi(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vbmi; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vbmi2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vbmi2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512bitalg(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bitalg; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vpopcntdq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vpopcntdq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vnni(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vnni; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512bf16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512fp16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512fp16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vp2intersect(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vp2intersect; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512_4vnniw(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512_4vnniw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512_4fmaps(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512_4fmaps; +#else + return false; +#endif +} + +/* [NOTE] Intel Advanced Matrix Extensions (AMX) detection + * + * I. AMX is a new extensions to the x86 ISA to work on matrices, consists of + * 1) 2-dimentional registers (tiles), hold sub-matrices from larger matrices in memory + * 2) Accelerator called Tile Matrix Multiply (TMUL), contains instructions operating on tiles + * + * II. Platforms that supports AMX: + * +-----------------+-----+----------+----------+----------+----------+ + * | Platforms | Gen | amx-bf16 | amx-tile | amx-int8 | amx-fp16 | + * +-----------------+-----+----------+----------+----------+----------+ + * | Sapphire Rapids | 4th | YES | YES | YES | NO | + * +-----------------+-----+----------+----------+----------+----------+ + * | Emerald Rapids | 5th | YES | YES | YES | NO | + * +-----------------+-----+----------+----------+----------+----------+ + * | Granite Rapids | 6th | YES | YES | YES | YES | + * +-----------------+-----+----------+----------+----------+----------+ + * + * Reference: https://www.intel.com/content/www/us/en/products/docs + * /accelerator-engines/advanced-matrix-extensions/overview.html + */ +static inline bool cpuinfo_has_x86_amx_bf16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_amx_tile(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_tile; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_amx_int8(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_int8; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_amx_fp16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_fp16; +#else + return false; +#endif +} + +/* + * Intel AVX Vector Neural Network Instructions (VNNI) INT8 + * Supported Platfroms: Sierra Forest, Arrow Lake, Lunar Lake + */ +static inline bool cpuinfo_has_x86_avx_vnni_int8(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx_vnni_int8; +#else + return false; +#endif +} + +/* + * Intel AVX Vector Neural Network Instructions (VNNI) INT16 + * Supported Platfroms: Arrow Lake, Lunar Lake + */ +static inline bool cpuinfo_has_x86_avx_vnni_int16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx_vnni_int16; +#else + return false; +#endif +} + +/* + * A new set of instructions, which can convert low precision floating point + * like BF16/FP16 to high precision floating point FP32, as well as convert FP32 + * elements to BF16. This instruction allows the platform to have improved AI + * capabilities and better compatibility. + * + * Supported Platforms: Sierra Forest, Arrow Lake, Lunar Lake + */ +static inline bool cpuinfo_has_x86_avx_ne_convert(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx_ne_convert; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_hle(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.hle; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rtm(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rtm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_xtest(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xtest; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mpx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mpx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_cmov(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.cmov; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_cmpxchg8b(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.cmpxchg8b; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_cmpxchg16b(void) { +#if CPUINFO_ARCH_X86_64 + return cpuinfo_isa.cmpxchg16b; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_clwb(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.clwb; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_movbe(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.movbe; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_lahf_sahf(void) { +#if CPUINFO_ARCH_X86 + return true; +#elif CPUINFO_ARCH_X86_64 + return cpuinfo_isa.lahf_sahf; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_lzcnt(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.lzcnt; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_popcnt(void) { +#if CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.popcnt; +#endif +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.popcnt; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_tbm(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.tbm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_bmi(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.bmi; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_bmi2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.bmi2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_adx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.adx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_aes(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.aes; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_vaes(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.vaes; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_pclmulqdq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.pclmulqdq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_vpclmulqdq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.vpclmulqdq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_gfni(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.gfni; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdrand(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdrand; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdseed(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdseed; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sha(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.sha; +#else + return false; +#endif +} + +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +/* This structure is not a part of stable API. Use cpuinfo_has_arm_* functions + * instead. */ +struct cpuinfo_arm_isa { +#if CPUINFO_ARCH_ARM + bool thumb; + bool thumb2; + bool thumbee; + bool jazelle; + bool armv5e; + bool armv6; + bool armv6k; + bool armv7; + bool armv7mp; + bool armv8; + bool idiv; + + bool vfpv2; + bool vfpv3; + bool d32; + bool fp16; + bool fma; + + bool wmmx; + bool wmmx2; + bool neon; +#endif +#if CPUINFO_ARCH_ARM64 + bool atomics; + bool bf16; + bool sve; + bool sve2; + bool i8mm; + bool sme; + uint32_t svelen; +#endif + bool rdm; + bool fp16arith; + bool dot; + bool jscvt; + bool fcma; + bool fhm; + + bool aes; + bool sha1; + bool sha2; + bool pmull; + bool crc32; +}; + +extern struct cpuinfo_arm_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_arm_thumb(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.thumb; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_thumb2(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.thumb2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v5e(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv5e; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v6(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv6; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v6k(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv6k; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v7(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv7; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v7mp(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv7mp; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v8(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.armv8; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_idiv(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.idiv; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv2(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3_d32(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.d32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3_fp16(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3_fp16_d32(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16 && cpuinfo_isa.d32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv4(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv4_d32(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma && cpuinfo_isa.d32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_fp16_arith(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_bf16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_wmmx(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.wmmx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_wmmx2(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.wmmx2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_fp16(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fp16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_fma(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_v8(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.armv8; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_atomics(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.atomics; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_rdm(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.rdm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_fp16_arith(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fp16arith; +#elif CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_fhm(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fhm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_dot(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.dot; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_bf16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_jscvt(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.jscvt; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_fcma(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fcma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_i8mm(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.i8mm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_aes(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.aes; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sha1(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sha1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sha2(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sha2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_pmull(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.pmull; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_crc32(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.crc32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sve(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sve_bf16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve && cpuinfo_isa.bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sve2(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve2; +#else + return false; +#endif +} + +// Function to get the max SVE vector length on ARM CPU's which support SVE. +static inline uint32_t cpuinfo_get_max_arm_sve_length(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.svelen * 8; // bytes * 8 = bit length(vector length) +#else + return 0; +#endif +} + +static inline bool cpuinfo_has_arm_sme(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme; +#else + return false; +#endif +} + +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +/* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions + * instead. */ +struct cpuinfo_riscv_isa { + /** + * Keep fields in line with the canonical order as defined by + * Section 27.11 Subset Naming Convention. + */ + /* RV32I/64I/128I Base ISA. */ + bool i; +#if CPUINFO_ARCH_RISCV32 + /* RV32E Base ISA. */ + bool e; +#endif + /* Integer Multiply/Divide Extension. */ + bool m; + /* Atomic Extension. */ + bool a; + /* Single-Precision Floating-Point Extension. */ + bool f; + /* Double-Precision Floating-Point Extension. */ + bool d; + /* Compressed Extension. */ + bool c; + /* Vector Extension. */ + bool v; +}; + +extern struct cpuinfo_riscv_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_riscv_i(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.i; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_e(void) { +#if CPUINFO_ARCH_RISCV32 + return cpuinfo_isa.e; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_m(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.m; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_a(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.a; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_f(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.f; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_d(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.d; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_g(void) { + // The 'G' extension is simply shorthand for 'IMAFD'. + return cpuinfo_has_riscv_i() && cpuinfo_has_riscv_m() && cpuinfo_has_riscv_a() && cpuinfo_has_riscv_f() && + cpuinfo_has_riscv_d(); +} + +static inline bool cpuinfo_has_riscv_c(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.c; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_v(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.v; +#else + return false; +#endif +} + +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); +const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); +const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_packages(void); +const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarchs(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void); + +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processor(uint32_t index); +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_core(uint32_t index); +const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_cluster(uint32_t index); +const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_package(uint32_t index); +const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarch(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index); + +uint32_t CPUINFO_ABI cpuinfo_get_processors_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_cores_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_clusters_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_packages_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_uarchs_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void); + +/** + * Returns upper bound on cache size. + */ +uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void); + +/** + * Identify the logical processor that executes the current thread. + * + * There is no guarantee that the thread will stay on the same logical processor + * for any time. Callers should treat the result as only a hint, and be prepared + * to handle NULL return value. + */ +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void); + +/** + * Identify the core that executes the current thread. + * + * There is no guarantee that the thread will stay on the same core for any + * time. Callers should treat the result as only a hint, and be prepared to + * handle NULL return value. + */ +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void); + +/** + * Identify the microarchitecture index of the core that executes the current + * thread. If the system does not support such identification, the function + * returns 0. + * + * There is no guarantee that the thread will stay on the same type of core for + * any time. Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void); + +/** + * Identify the microarchitecture index of the core that executes the current + * thread. If the system does not support such identification, the function + * returns the user-specified default value. + * + * There is no guarantee that the thread will stay on the same type of core for + * any time. Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* CPUINFO_H */ diff --git a/.venv/lib/python3.11/site-packages/torch/include/dnnl.h b/.venv/lib/python3.11/site-packages/torch/include/dnnl.h new file mode 100644 index 0000000000000000000000000000000000000000..bc74bf644f4b628018d7a9103ba63320abc466d5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/dnnl.h @@ -0,0 +1,22 @@ +/******************************************************************************* +* Copyright 2020 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_H +#define DNNL_H + +#include "oneapi/dnnl/dnnl.h" + +#endif /* DNNL_H */ diff --git a/.venv/lib/python3.11/site-packages/torch/include/dnnl_sycl_types.h b/.venv/lib/python3.11/site-packages/torch/include/dnnl_sycl_types.h new file mode 100644 index 0000000000000000000000000000000000000000..a4a854a4cf138103f4c53030083e119cc0732cf1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/dnnl_sycl_types.h @@ -0,0 +1,22 @@ +/******************************************************************************* +* Copyright 2020 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_SYCL_TYPES_H +#define DNNL_SYCL_TYPES_H + +#include "oneapi/dnnl/dnnl_sycl_types.h" + +#endif /* DNNL_SYCL_TYPES_H */ diff --git a/.venv/lib/python3.11/site-packages/torch/include/psimd.h b/.venv/lib/python3.11/site-packages/torch/include/psimd.h new file mode 100644 index 0000000000000000000000000000000000000000..b7cb65d799c98931a73b3184511b1bd8c2b30ec0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/psimd.h @@ -0,0 +1,1384 @@ +#pragma once +#ifndef PSIMD_H +#define PSIMD_H + +#if defined(__CUDA_ARCH__) + /* CUDA compiler */ + #define PSIMD_INTRINSIC __forceinline__ __device__ +#elif defined(__OPENCL_VERSION__) + /* OpenCL compiler */ + #define PSIMD_INTRINSIC inline static +#elif defined(__INTEL_COMPILER) + /* Intel compiler, even on Windows */ + #define PSIMD_INTRINSIC inline static __attribute__((__always_inline__)) +#elif defined(__GNUC__) + /* GCC-compatible compiler (gcc/clang/icc) */ + #define PSIMD_INTRINSIC inline static __attribute__((__always_inline__)) +#elif defined(_MSC_VER) + /* MSVC-compatible compiler (cl/icl/clang-cl) */ + #define PSIMD_INTRINSIC __forceinline static +#elif defined(__cplusplus) + /* Generic C++ compiler */ + #define PSIMD_INTRINSIC inline static +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + /* Generic C99 compiler */ + #define PSIMD_INTRINSIC inline static +#else + /* Generic C compiler */ + #define PSIMD_INTRINSIC static +#endif + +#if defined(__GNUC__) || defined(__clang__) + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + #include + #endif + + #if defined(__SSE2__) + #include + #endif + + #if defined(__SSE3__) + #include + #endif + + #if defined(__SSSE3__) + #include + #endif + + #if defined(__SSE4_1__) + #include + #endif + + #if defined(__SSE4_2__) + #include + #endif + + #if defined(__AVX__) + #include + #endif +#elif defined(_MSC_VER) + #include +#endif + +#if defined(__cplusplus) + #define PSIMD_CXX_SYNTAX +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define PSIMD_C11_SYNTAX +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #define PSIMD_C99_SYNTAX +#else + #define PSIMD_C89_SYNTAX +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) + #include + #include +#elif !defined(__OPENCL_VERSION__) + #include + #include +#endif + +#if defined(__GNUC__) || defined(__clang__) + #define PSIMD_HAVE_F64 0 + #define PSIMD_HAVE_F32 1 + #define PSIMD_HAVE_U8 1 + #define PSIMD_HAVE_S8 1 + #define PSIMD_HAVE_U16 1 + #define PSIMD_HAVE_S16 1 + #define PSIMD_HAVE_U32 1 + #define PSIMD_HAVE_S32 1 + #define PSIMD_HAVE_U64 0 + #define PSIMD_HAVE_S64 0 + + typedef int8_t psimd_s8 __attribute__((vector_size(16), aligned(1))); + typedef uint8_t psimd_u8 __attribute__((vector_size(16), aligned(1))); + typedef int16_t psimd_s16 __attribute__((vector_size(16), aligned(2))); + typedef uint16_t psimd_u16 __attribute__((vector_size(16), aligned(2))); + typedef int32_t psimd_s32 __attribute__((vector_size(16), aligned(4))); + typedef uint32_t psimd_u32 __attribute__((vector_size(16), aligned(4))); + typedef float psimd_f32 __attribute__((vector_size(16), aligned(4))); + + typedef struct { + psimd_s8 lo; + psimd_s8 hi; + } psimd_s8x2; + + typedef struct { + psimd_u8 lo; + psimd_u8 hi; + } psimd_u8x2; + + typedef struct { + psimd_s16 lo; + psimd_s16 hi; + } psimd_s16x2; + + typedef struct { + psimd_u16 lo; + psimd_u16 hi; + } psimd_u16x2; + + typedef struct { + psimd_s32 lo; + psimd_s32 hi; + } psimd_s32x2; + + typedef struct { + psimd_u32 lo; + psimd_u32 hi; + } psimd_u32x2; + + typedef struct { + psimd_f32 lo; + psimd_f32 hi; + } psimd_f32x2; + + /* Bit casts */ + PSIMD_INTRINSIC psimd_u32x2 psimd_cast_s32x2_u32x2(psimd_s32x2 v) { + return (psimd_u32x2) { .lo = (psimd_u32) v.lo, .hi = (psimd_u32) v.hi }; + } + + PSIMD_INTRINSIC psimd_f32x2 psimd_cast_s32x2_f32x2(psimd_s32x2 v) { + return (psimd_f32x2) { .lo = (psimd_f32) v.lo, .hi = (psimd_f32) v.hi }; + } + + PSIMD_INTRINSIC psimd_s32x2 psimd_cast_u32x2_s32x2(psimd_u32x2 v) { + return (psimd_s32x2) { .lo = (psimd_s32) v.lo, .hi = (psimd_s32) v.hi }; + } + + PSIMD_INTRINSIC psimd_f32x2 psimd_cast_u32x2_f32x2(psimd_u32x2 v) { + return (psimd_f32x2) { .lo = (psimd_f32) v.lo, .hi = (psimd_f32) v.hi }; + } + + PSIMD_INTRINSIC psimd_s32x2 psimd_cast_f32x2_s32x2(psimd_f32x2 v) { + return (psimd_s32x2) { .lo = (psimd_s32) v.lo, .hi = (psimd_s32) v.hi }; + } + + PSIMD_INTRINSIC psimd_u32x2 psimd_cast_f32x2_u32x2(psimd_f32x2 v) { + return (psimd_u32x2) { .lo = (psimd_u32) v.lo, .hi = (psimd_u32) v.hi }; + } + + /* Swap */ + PSIMD_INTRINSIC void psimd_swap_s8(psimd_s8 a[1], psimd_s8 b[1]) { + const psimd_s8 new_a = *b; + const psimd_s8 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_u8(psimd_u8 a[1], psimd_u8 b[1]) { + const psimd_u8 new_a = *b; + const psimd_u8 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_s16(psimd_s16 a[1], psimd_s16 b[1]) { + const psimd_s16 new_a = *b; + const psimd_s16 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_u16(psimd_u16 a[1], psimd_u16 b[1]) { + const psimd_u16 new_a = *b; + const psimd_u16 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_s32(psimd_s32 a[1], psimd_s32 b[1]) { + const psimd_s32 new_a = *b; + const psimd_s32 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_u32(psimd_u32 a[1], psimd_u32 b[1]) { + const psimd_u32 new_a = *b; + const psimd_u32 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_f32(psimd_f32 a[1], psimd_f32 b[1]) { + const psimd_f32 new_a = *b; + const psimd_f32 new_b = *a; + *a = new_a; + *b = new_b; + } + + /* Zero-initialization */ + PSIMD_INTRINSIC psimd_s8 psimd_zero_s8(void) { + return (psimd_s8) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u8 psimd_zero_u8(void) { + return (psimd_u8) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_s16 psimd_zero_s16(void) { + return (psimd_s16) { 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u16 psimd_zero_u16(void) { + return (psimd_u16) { 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_zero_s32(void) { + return (psimd_s32) { 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_zero_u32(void) { + return (psimd_u32) { 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_zero_f32(void) { + return (psimd_f32) { 0.0f, 0.0f, 0.0f, 0.0f }; + } + + /* Initialization to the same constant */ + PSIMD_INTRINSIC psimd_s8 psimd_splat_s8(int8_t c) { + return (psimd_s8) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_u8 psimd_splat_u8(uint8_t c) { + return (psimd_u8) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_s16 psimd_splat_s16(int16_t c) { + return (psimd_s16) { c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_u16 psimd_splat_u16(uint16_t c) { + return (psimd_u16) { c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_splat_s32(int32_t c) { + return (psimd_s32) { c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_splat_u32(uint32_t c) { + return (psimd_u32) { c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat_f32(float c) { + return (psimd_f32) { c, c, c, c }; + } + + /* Load vector */ + PSIMD_INTRINSIC psimd_s8 psimd_load_s8(const void* address) { + return *((const psimd_s8*) address); + } + + PSIMD_INTRINSIC psimd_u8 psimd_load_u8(const void* address) { + return *((const psimd_u8*) address); + } + + PSIMD_INTRINSIC psimd_s16 psimd_load_s16(const void* address) { + return *((const psimd_s16*) address); + } + + PSIMD_INTRINSIC psimd_u16 psimd_load_u16(const void* address) { + return *((const psimd_u16*) address); + } + + PSIMD_INTRINSIC psimd_s32 psimd_load_s32(const void* address) { + return *((const psimd_s32*) address); + } + + PSIMD_INTRINSIC psimd_u32 psimd_load_u32(const void* address) { + return *((const psimd_u32*) address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_f32(const void* address) { + return *((const psimd_f32*) address); + } + + PSIMD_INTRINSIC psimd_s8 psimd_load_splat_s8(const void* address) { + return psimd_splat_s8(*((const int8_t*) address)); + } + + PSIMD_INTRINSIC psimd_u8 psimd_load_splat_u8(const void* address) { + return psimd_splat_u8(*((const uint8_t*) address)); + } + + PSIMD_INTRINSIC psimd_s16 psimd_load_splat_s16(const void* address) { + return psimd_splat_s16(*((const int16_t*) address)); + } + + PSIMD_INTRINSIC psimd_u16 psimd_load_splat_u16(const void* address) { + return psimd_splat_u16(*((const uint16_t*) address)); + } + + PSIMD_INTRINSIC psimd_s32 psimd_load_splat_s32(const void* address) { + return psimd_splat_s32(*((const int32_t*) address)); + } + + PSIMD_INTRINSIC psimd_u32 psimd_load_splat_u32(const void* address) { + return psimd_splat_u32(*((const uint32_t*) address)); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_splat_f32(const void* address) { + return psimd_splat_f32(*((const float*) address)); + } + + PSIMD_INTRINSIC psimd_s32 psimd_load1_s32(const void* address) { + return (psimd_s32) { *((const int32_t*) address), 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_load1_u32(const void* address) { + return (psimd_u32) { *((const uint32_t*) address), 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load1_f32(const void* address) { + return (psimd_f32) { *((const float*) address), 0.0f, 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_load2_s32(const void* address) { + const int32_t* address_s32 = (const int32_t*) address; + return (psimd_s32) { address_s32[0], address_s32[1], 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_load2_u32(const void* address) { + const uint32_t* address_u32 = (const uint32_t*) address; + return (psimd_u32) { address_u32[0], address_u32[1], 0, 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load2_f32(const void* address) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[1], 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_load3_s32(const void* address) { + const int32_t* address_s32 = (const int32_t*) address; + return (psimd_s32) { address_s32[0], address_s32[1], address_s32[2], 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_load3_u32(const void* address) { + const uint32_t* address_u32 = (const uint32_t*) address; + return (psimd_u32) { address_u32[0], address_u32[1], address_u32[2], 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load3_f32(const void* address) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[1], address_f32[2], 0.0f }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_load4_s32(const void* address) { + return psimd_load_s32(address); + } + + PSIMD_INTRINSIC psimd_u32 psimd_load4_u32(const void* address) { + return psimd_load_u32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load4_f32(const void* address) { + return psimd_load_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_stride2_f32(const void* address) { + const psimd_f32 v0x1x = psimd_load_f32(address); + const psimd_f32 vx2x3 = psimd_load_f32((const float*) address + 3); + #if defined(__clang__) + return __builtin_shufflevector(v0x1x, vx2x3, 0, 2, 5, 7); + #else + return __builtin_shuffle(v0x1x, vx2x3, (psimd_s32) { 0, 2, 5, 7 }); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_load1_stride2_f32(const void* address) { + return psimd_load_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load2_stride2_f32(const void* address) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[2], 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load3_stride2_f32(const void* address) { + const psimd_f32 v0x1x = psimd_load_f32(address); + const psimd_f32 v2zzz = psimd_load1_f32((const float*) address + 2); + #if defined(__clang__) + return __builtin_shufflevector(v0x1x, v2zzz, 0, 2, 4, 6); + #else + return __builtin_shuffle(v0x1x, v2zzz, (psimd_s32) { 0, 2, 4, 6 }); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_load4_stride2_f32(const void* address) { + return psimd_load_stride2_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_stride_f32(const void* address, size_t stride) { + const float* address0_f32 = (const float*) address; + const float* address1_f32 = address0_f32 + stride; + const float* address2_f32 = address1_f32 + stride; + const float* address3_f32 = address2_f32 + stride; + return (psimd_f32) { *address0_f32, *address1_f32, *address2_f32, *address3_f32 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load1_stride_f32(const void* address, size_t stride) { + return psimd_load1_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load2_stride_f32(const void* address, size_t stride) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[stride], 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load3_stride_f32(const void* address, size_t stride) { + const float* address0_f32 = (const float*) address; + const float* address1_f32 = address0_f32 + stride; + const float* address2_f32 = address1_f32 + stride; + return (psimd_f32) { *address0_f32, *address1_f32, *address2_f32, 0.0f }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load4_stride_f32(const void* address, size_t stride) { + return psimd_load_stride_f32(address, stride); + } + + /* Store vector */ + PSIMD_INTRINSIC void psimd_store_s8(void* address, psimd_s8 value) { + *((psimd_s8*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_u8(void* address, psimd_u8 value) { + *((psimd_u8*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_s16(void* address, psimd_s16 value) { + *((psimd_s16*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_u16(void* address, psimd_u16 value) { + *((psimd_u16*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_s32(void* address, psimd_s32 value) { + *((psimd_s32*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_u32(void* address, psimd_u32 value) { + *((psimd_u32*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_f32(void* address, psimd_f32 value) { + *((psimd_f32*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store1_s32(void* address, psimd_s32 value) { + *((int32_t*) address) = value[0]; + } + + PSIMD_INTRINSIC void psimd_store1_u32(void* address, psimd_u32 value) { + *((uint32_t*) address) = value[0]; + } + + PSIMD_INTRINSIC void psimd_store1_f32(void* address, psimd_f32 value) { + *((float*) address) = value[0]; + } + + PSIMD_INTRINSIC void psimd_store2_s32(void* address, psimd_s32 value) { + int32_t* address_s32 = (int32_t*) address; + address_s32[0] = value[0]; + address_s32[1] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store2_u32(void* address, psimd_u32 value) { + uint32_t* address_u32 = (uint32_t*) address; + address_u32[0] = value[0]; + address_u32[1] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store2_f32(void* address, psimd_f32 value) { + float* address_f32 = (float*) address; + address_f32[0] = value[0]; + address_f32[1] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store3_s32(void* address, psimd_s32 value) { + int32_t* address_s32 = (int32_t*) address; + address_s32[0] = value[0]; + address_s32[1] = value[1]; + address_s32[2] = value[2]; + } + + PSIMD_INTRINSIC void psimd_store3_u32(void* address, psimd_u32 value) { + uint32_t* address_u32 = (uint32_t*) address; + address_u32[0] = value[0]; + address_u32[1] = value[1]; + address_u32[2] = value[2]; + } + + PSIMD_INTRINSIC void psimd_store3_f32(void* address, psimd_f32 value) { + float* address_f32 = (float*) address; + address_f32[0] = value[0]; + address_f32[1] = value[1]; + address_f32[2] = value[2]; + } + + PSIMD_INTRINSIC void psimd_store4_s32(void* address, psimd_s32 value) { + psimd_store_s32(address, value); + } + + PSIMD_INTRINSIC void psimd_store4_u32(void* address, psimd_u32 value) { + psimd_store_u32(address, value); + } + + PSIMD_INTRINSIC void psimd_store4_f32(void* address, psimd_f32 value) { + psimd_store_f32(address, value); + } + + PSIMD_INTRINSIC void psimd_store_stride_f32(void* address, size_t stride, psimd_f32 value) { + float* address0_f32 = (float*) address; + float* address1_f32 = address0_f32 + stride; + float* address2_f32 = address1_f32 + stride; + float* address3_f32 = address2_f32 + stride; + *address0_f32 = value[0]; + *address1_f32 = value[1]; + *address2_f32 = value[2]; + *address3_f32 = value[3]; + } + + PSIMD_INTRINSIC void psimd_store1_stride_f32(void* address, size_t stride, psimd_f32 value) { + psimd_store1_f32(address, value); + } + + PSIMD_INTRINSIC void psimd_store2_stride_f32(void* address, size_t stride, psimd_f32 value) { + float* address_f32 = (float*) address; + address_f32[0] = value[0]; + address_f32[stride] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store3_stride_f32(void* address, size_t stride, psimd_f32 value) { + float* address0_f32 = (float*) address; + float* address1_f32 = address0_f32 + stride; + float* address2_f32 = address1_f32 + stride; + *address0_f32 = value[0]; + *address1_f32 = value[1]; + *address2_f32 = value[2]; + } + + /* Vector addition */ + PSIMD_INTRINSIC psimd_s8 psimd_add_s8(psimd_s8 a, psimd_s8 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_u8 psimd_add_u8(psimd_u8 a, psimd_u8 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_s16 psimd_add_s16(psimd_s16 a, psimd_s16 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_u16 psimd_add_u16(psimd_u16 a, psimd_u16 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_s32 psimd_add_s32(psimd_s32 a, psimd_s32 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_u32 psimd_add_u32(psimd_u32 a, psimd_u32 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_f32 psimd_add_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) && !defined(__FAST_MATH__) + return (psimd_f32) vaddq_f32((float32x4_t) a, (float32x4_t) b); + #else + return a + b; + #endif + } + + /* Vector subtraction */ + PSIMD_INTRINSIC psimd_s8 psimd_sub_s8(psimd_s8 a, psimd_s8 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_u8 psimd_sub_u8(psimd_u8 a, psimd_u8 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_s16 psimd_sub_s16(psimd_s16 a, psimd_s16 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_u16 psimd_sub_u16(psimd_u16 a, psimd_u16 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_s32 psimd_sub_s32(psimd_s32 a, psimd_s32 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_u32 psimd_sub_u32(psimd_u32 a, psimd_u32 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_f32 psimd_sub_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) && !defined(__FAST_MATH__) + return (psimd_f32) vsubq_f32((float32x4_t) a, (float32x4_t) b); + #else + return a - b; + #endif + } + + /* Vector multiplication */ + PSIMD_INTRINSIC psimd_s8 psimd_mul_s8(psimd_s8 a, psimd_s8 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_u8 psimd_mul_u8(psimd_u8 a, psimd_u8 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_s16 psimd_mul_s16(psimd_s16 a, psimd_s16 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_u16 psimd_mul_u16(psimd_u16 a, psimd_u16 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_s32 psimd_mul_s32(psimd_s32 a, psimd_s32 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_u32 psimd_mul_u32(psimd_u32 a, psimd_u32 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_f32 psimd_mul_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) && !defined(__FAST_MATH__) + return (psimd_f32) vmulq_f32((float32x4_t) a, (float32x4_t) b); + #else + return a * b; + #endif + } + + /* Quasi-Fused Multiply-Add */ + PSIMD_INTRINSIC psimd_f32 psimd_qfma_f32(psimd_f32 a, psimd_f32 b, psimd_f32 c) { + #if defined(__aarch64__) || defined(__ARM_NEON__) && defined(__ARM_FEATURE_FMA) + return (psimd_f32) vfmaq_f32((float32x4_t) a, (float32x4_t) b, (float32x4_t) c); + #elif (defined(__x86_64__) || defined(__i386__) || defined(__i686__)) && defined(__FMA__) + return (psimd_f32) _mm_fmadd_ps((__m128) b, (__m128) c, (__m128) a); + #elif (defined(__x86_64__) || defined(__i386__) || defined(__i686__)) && defined(__FMA4__) + return (psimd_f32) _mm_macc_ps((__m128) b, (__m128) c, (__m128) a); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) && PSIMD_ENABLE_WASM_QFMA + return (psimd_f32) __builtin_wasm_qfma_f32x4(a, b, c); + #else + return a + b * c; + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_div_f32(psimd_f32 a, psimd_f32 b) { + return a / b; + } + + /* Vector and */ + PSIMD_INTRINSIC psimd_f32 psimd_andmask_f32(psimd_s32 mask, psimd_f32 v) { + return (psimd_f32) (mask & (psimd_s32) v); + } + + /* Vector and-not */ + PSIMD_INTRINSIC psimd_f32 psimd_andnotmask_f32(psimd_s32 mask, psimd_f32 v) { + return (psimd_f32) (~mask & (psimd_s32) v); + } + + /* Vector blend */ + PSIMD_INTRINSIC psimd_s8 psimd_blend_s8(psimd_s8 mask, psimd_s8 a, psimd_s8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s8) vbslq_s8((uint8x16_t) mask, (int8x16_t) a, (int8x16_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s8) __builtin_wasm_bitselect(a, b, mask); + #else + return (mask & a) | (~mask & b); + #endif + } + + PSIMD_INTRINSIC psimd_u8 psimd_blend_u8(psimd_s8 mask, psimd_u8 a, psimd_u8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u8) vbslq_u8((uint8x16_t) mask, (uint8x16_t) a, (uint8x16_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u8) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_u8) ((mask & (psimd_s8) a) | (~mask & (psimd_s8) b)); + #endif + } + + PSIMD_INTRINSIC psimd_s16 psimd_blend_s16(psimd_s16 mask, psimd_s16 a, psimd_s16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s16) vbslq_s16((uint16x8_t) mask, (int16x8_t) a, (int16x8_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s16) __builtin_wasm_bitselect(a, b, mask); + #else + return (mask & a) | (~mask & b); + #endif + } + + PSIMD_INTRINSIC psimd_u16 psimd_blend_u16(psimd_s16 mask, psimd_u16 a, psimd_u16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u16) vbslq_u16((uint16x8_t) mask, (uint16x8_t) a, (uint16x8_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u16) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_u16) ((mask & (psimd_s16) a) | (~mask & (psimd_s16) b)); + #endif + } + + PSIMD_INTRINSIC psimd_s32 psimd_blend_s32(psimd_s32 mask, psimd_s32 a, psimd_s32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s32) vbslq_s32((uint32x4_t) mask, (int32x4_t) a, (int32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s32) __builtin_wasm_bitselect(a, b, mask); + #else + return (mask & a) | (~mask & b); + #endif + } + + PSIMD_INTRINSIC psimd_u32 psimd_blend_u32(psimd_s32 mask, psimd_u32 a, psimd_u32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u32) vbslq_u32((uint32x4_t) mask, (uint32x4_t) a, (uint32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u32) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_u32) ((mask & (psimd_s32) a) | (~mask & (psimd_s32) b)); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_blend_f32(psimd_s32 mask, psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vbslq_f32((uint32x4_t) mask, (float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_f32) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_f32) ((mask & (psimd_s32) a) | (~mask & (psimd_s32) b)); + #endif + } + + /* Vector blend on sign */ + PSIMD_INTRINSIC psimd_s8 psimd_signblend_s8(psimd_s8 x, psimd_s8 a, psimd_s8 b) { + return psimd_blend_s8(x >> psimd_splat_s8(7), a, b); + } + + PSIMD_INTRINSIC psimd_u8 psimd_signblend_u8(psimd_s8 x, psimd_u8 a, psimd_u8 b) { + return psimd_blend_u8((x >> psimd_splat_s8(7)), a, b); + } + + PSIMD_INTRINSIC psimd_s16 psimd_signblend_s16(psimd_s16 x, psimd_s16 a, psimd_s16 b) { + return psimd_blend_s16(x >> psimd_splat_s16(15), a, b); + } + + PSIMD_INTRINSIC psimd_u16 psimd_signblend_u16(psimd_s16 x, psimd_u16 a, psimd_u16 b) { + return psimd_blend_u16((x >> psimd_splat_s16(15)), a, b); + } + + PSIMD_INTRINSIC psimd_s32 psimd_signblend_s32(psimd_s32 x, psimd_s32 a, psimd_s32 b) { + return psimd_blend_s32(x >> psimd_splat_s32(31), a, b); + } + + PSIMD_INTRINSIC psimd_u32 psimd_signblend_u32(psimd_s32 x, psimd_u32 a, psimd_u32 b) { + return psimd_blend_u32((x >> psimd_splat_s32(31)), a, b); + } + + PSIMD_INTRINSIC psimd_f32 psimd_signblend_f32(psimd_f32 x, psimd_f32 a, psimd_f32 b) { + const psimd_s32 mask = (psimd_s32) x >> psimd_splat_s32(31); + return psimd_blend_f32(mask, a, b); + } + + /* Vector absolute value */ + PSIMD_INTRINSIC psimd_f32 psimd_abs_f32(psimd_f32 v) { + const psimd_s32 mask = (psimd_s32) psimd_splat_f32(-0.0f); + return (psimd_f32) ((psimd_s32) v & ~mask); + } + + /* Vector negation */ + PSIMD_INTRINSIC psimd_f32 psimd_neg_f32(psimd_f32 v) { + const psimd_s32 mask = (psimd_s32) psimd_splat_f32(-0.0f); + return (psimd_f32) ((psimd_s32) v ^ mask); + } + + /* Vector maximum */ + PSIMD_INTRINSIC psimd_s8 psimd_max_s8(psimd_s8 a, psimd_s8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s8) vmaxq_s8((int8x16_t) a, (int8x16_t) b); + #else + return psimd_blend_s8(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u8 psimd_max_u8(psimd_u8 a, psimd_u8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u8) vmaxq_u8((uint8x16_t) a, (uint8x16_t) b); + #else + return psimd_blend_u8(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s16 psimd_max_s16(psimd_s16 a, psimd_s16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s16) vmaxq_s16((int16x8_t) a, (int16x8_t) b); + #else + return psimd_blend_s16(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u16 psimd_max_u16(psimd_u16 a, psimd_u16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u16) vmaxq_u16((uint16x8_t) a, (uint16x8_t) b); + #else + return psimd_blend_u16(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s32 psimd_max_s32(psimd_s32 a, psimd_s32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s32) vmaxq_s32((int32x4_t) a, (int32x4_t) b); + #else + return psimd_blend_s32(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u32 psimd_max_u32(psimd_u32 a, psimd_u32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u32) vmaxq_u32((uint32x4_t) a, (uint32x4_t) b); + #else + return psimd_blend_u32(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_max_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vmaxq_f32((float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return __builtin_wasm_max_f32x4(a, b); + #else + return psimd_blend_f32(a > b, a, b); + #endif + } + + /* Vector minimum */ + PSIMD_INTRINSIC psimd_s8 psimd_min_s8(psimd_s8 a, psimd_s8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s8) vminq_s8((int8x16_t) a, (int8x16_t) b); + #else + return psimd_blend_s8(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u8 psimd_min_u8(psimd_u8 a, psimd_u8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u8) vminq_u8((uint8x16_t) a, (uint8x16_t) b); + #else + return psimd_blend_u8(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s16 psimd_min_s16(psimd_s16 a, psimd_s16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s16) vminq_s16((int16x8_t) a, (int16x8_t) b); + #else + return psimd_blend_s16(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u16 psimd_min_u16(psimd_u16 a, psimd_u16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u16) vminq_u16((uint16x8_t) a, (uint16x8_t) b); + #else + return psimd_blend_u16(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s32 psimd_min_s32(psimd_s32 a, psimd_s32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s32) vminq_s32((int32x4_t) a, (int32x4_t) b); + #else + return psimd_blend_s32(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u32 psimd_min_u32(psimd_u32 a, psimd_u32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u32) vminq_u32((uint32x4_t) a, (uint32x4_t) b); + #else + return psimd_blend_u32(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_min_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vminq_f32((float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return __builtin_wasm_min_f32x4(a, b); + #else + return psimd_blend_f32(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_cvt_s32_f32(psimd_s32 v) { + #if defined(__clang__) + return __builtin_convertvector(v, psimd_f32); + #elif defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vcvtq_f32_s32((int32x4_t) v); + #elif defined(__SSE2__) + return (psimd_f32) _mm_cvtepi32_ps((__m128i) v); + #else + return (psimd_f32) { (float) v[0], (float) v[1], (float) v[2], (float) v[3] }; + #endif + } + + /* Broadcast vector element */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_f32 psimd_splat0_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 0, 0, 0, 0); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat1_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 1, 1, 1, 1); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat2_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 2, 2, 2, 2); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat3_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 3, 3, 3, 3); + } + #else + PSIMD_INTRINSIC psimd_f32 psimd_splat0_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 0, 0, 0, 0 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat1_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 1, 1, 1, 1 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat2_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 2, 2, 2, 2 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat3_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 3, 3, 3 }); + } + #endif + + /* Reversal of vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s8 psimd_reverse_s8(psimd_s8 v) { + return __builtin_shufflevector(v, v, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_u8 psimd_reverse_u8(psimd_u8 v) { + return __builtin_shufflevector(v, v, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_s16 psimd_reverse_s16(psimd_s16 v) { + return __builtin_shufflevector(v, v, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_u16 psimd_reverse_u16(psimd_u16 v) { + return __builtin_shufflevector(v, v, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_s32 psimd_reverse_s32(psimd_s32 v) { + return __builtin_shufflevector(v, v, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_u32 psimd_reverse_u32(psimd_u32 v) { + return __builtin_shufflevector(v, v, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_f32 psimd_reverse_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 3, 2, 1, 0); + } + #else + PSIMD_INTRINSIC psimd_s8 psimd_reverse_s8(psimd_s8 v) { + return __builtin_shuffle(v, (psimd_s8) { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_u8 psimd_reverse_u8(psimd_u8 v) { + return __builtin_shuffle(v, (psimd_s8) { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_reverse_s16(psimd_s16 v) { + return __builtin_shuffle(v, (psimd_s16) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_reverse_u16(psimd_u16 v) { + return __builtin_shuffle(v, (psimd_s16) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_reverse_s32(psimd_s32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_reverse_u32(psimd_u32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_reverse_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 2, 1, 0 }); + } + #endif + + /* Interleaving of vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s16 psimd_interleave_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); + } + + PSIMD_INTRINSIC psimd_s16 psimd_interleave_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 0, 4+0, 1, 4+1); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 2, 4+2, 3, 4+3); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 0, 4+0, 1, 4+1); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 2, 4+2, 3, 4+3); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 0, 4+0, 1, 4+1); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 2, 4+2, 3, 4+3); + } + #else + PSIMD_INTRINSIC psimd_s16 psimd_interleave_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_interleave_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 4+0, 1, 4+1 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 4+0, 1, 4+1 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 4+0, 1, 4+1 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 }); + } + #endif + + /* Concatenation of low/high vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s16 psimd_concat_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 0, 1, 4+0, 4+1); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 2, 3, 4+2, 4+3); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 0, 1, 4+0, 4+1); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 2, 3, 4+2, 4+3); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 0, 1, 4+0, 4+1); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 2, 3, 4+2, 4+3); + } + #else + PSIMD_INTRINSIC psimd_s16 psimd_concat_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 1, 4+0, 4+1 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 3, 4+2, 4+3 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 1, 4+0, 4+1 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 3, 4+2, 4+3 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 1, 4+0, 4+1 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 3, 4+2, 4+3 }); + } + #endif + + /* Concatenation of even/odd vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s8 psimd_concat_even_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shufflevector(a, b, + 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14); + } + + PSIMD_INTRINSIC psimd_s8 psimd_concat_odd_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shufflevector(a, b, + 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_even_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shufflevector(a, b, + 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_odd_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shufflevector(a, b, + 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_even_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_odd_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_even_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_odd_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_even_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 0, 2, 4+0, 4+2); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_odd_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 1, 3, 4+1, 4+3); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_even_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 0, 2, 4+0, 4+2); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_odd_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 1, 3, 4+1, 4+3); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_even_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 0, 2, 4+0, 4+2); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_odd_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 1, 3, 4+1, 4+3); + } + #else + PSIMD_INTRINSIC psimd_s8 psimd_concat_even_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14 }); + } + + PSIMD_INTRINSIC psimd_s8 psimd_concat_odd_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15 }); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_even_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14 }); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_odd_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_even_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_odd_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_even_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_odd_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_even_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 2, 4+0, 4+2 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_odd_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 1, 3, 4+1, 4+3 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_even_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 2, 4+0, 4+2 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_odd_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 1, 3, 4+1, 4+3 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_even_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 2, 4+0, 4+2 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_odd_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 1, 3, 4+1, 4+3 }); + } + #endif + + /* Vector reduce */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_sum_f32(psimd_f32 v) { + const psimd_f32 temp = v + __builtin_shufflevector(v, v, 2, 3, 0, 1); + return temp + __builtin_shufflevector(temp, temp, 1, 0, 3, 2); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_max_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_max_f32(v, __builtin_shufflevector(v, v, 2, 3, 0, 1)); + return psimd_max_f32(temp, __builtin_shufflevector(temp, temp, 1, 0, 3, 2)); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_min_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_min_f32(v, __builtin_shufflevector(v, v, 2, 3, 0, 1)); + return psimd_min_f32(temp, __builtin_shufflevector(temp, temp, 1, 0, 3, 2)); + } + + PSIMD_INTRINSIC float psimd_reduce_sum_f32(psimd_f32 v) { + const psimd_f32 temp = v + __builtin_shufflevector(v, v, 2, 3, -1, -1); + const psimd_f32 result = temp + __builtin_shufflevector(temp, temp, 1, -1, -1, -1); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_max_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_max_f32(v, __builtin_shufflevector(v, v, 2, 3, -1, -1)); + const psimd_f32 result = psimd_max_f32(temp, __builtin_shufflevector(temp, temp, 1, -1, -1, -1)); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_min_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_min_f32(v, __builtin_shufflevector(v, v, 2, 3, -1, -1)); + const psimd_f32 result = psimd_min_f32(temp, __builtin_shufflevector(temp, temp, 1, -1, -1, -1)); + return result[0]; + } + #else + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_sum_f32(psimd_f32 v) { + const psimd_f32 temp = v + __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 }); + return temp + __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_max_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_max_f32(v, __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 })); + return psimd_max_f32(temp, __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 })); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_min_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_min_f32(v, __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 })); + return psimd_min_f32(temp, __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 })); + } + + PSIMD_INTRINSIC float psimd_reduce_sum_f32(psimd_f32 v) { + const psimd_f32 result = psimd_allreduce_sum_f32(v); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_max_f32(psimd_f32 v) { + const psimd_f32 result = psimd_allreduce_max_f32(v); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_min_f32(psimd_f32 v) { + const psimd_f32 result = psimd_allreduce_min_f32(v); + return result[0]; + } + #endif +#endif + +#endif /* PSIMD_H */ diff --git a/.venv/lib/python3.11/site-packages/torch/include/pthreadpool.h b/.venv/lib/python3.11/site-packages/torch/include/pthreadpool.h new file mode 100644 index 0000000000000000000000000000000000000000..953ccc4cc24070aa4897fabc081cba466e34170a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torch/include/pthreadpool.h @@ -0,0 +1,2555 @@ +#ifndef PTHREADPOOL_H_ +#define PTHREADPOOL_H_ + +#include +#include + +typedef struct pthreadpool* pthreadpool_t; + +typedef void (*pthreadpool_task_1d_t)(void*, size_t); +typedef void (*pthreadpool_task_1d_with_thread_t)(void*, size_t, size_t); +typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t); +typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t); +typedef void (*pthreadpool_task_2d_with_thread_t)(void*, size_t, size_t, size_t); +typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t); +typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void*, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t); + +typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t); +typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void*, uint32_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t); + +typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)(void*, uint32_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t); + + +/** + * Disable support for denormalized numbers to the maximum extent possible for + * the duration of the computation. + * + * Handling denormalized floating-point numbers is often implemented in + * microcode, and incurs significant performance degradation. This hint + * instructs the thread pool to disable support for denormalized numbers before + * running the computation by manipulating architecture-specific control + * registers, and restore the initial value of control registers after the + * computation is complete. The thread pool temporary disables denormalized + * numbers on all threads involved in the computation (i.e. the caller threads, + * and potentially worker threads). + * + * Disabling denormalized numbers may have a small negative effect on results' + * accuracy. As various architectures differ in capabilities to control + * processing of denormalized numbers, using this flag may also hurt results' + * reproducibility across different instruction set architectures. + */ +#define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001 + +/** + * Yield worker threads to the system scheduler after the operation is finished. + * + * Force workers to use kernel wait (instead of active spin-wait by default) for + * new commands after this command is processed. This flag affects only the + * immediate next operation on this thread pool. To make the thread pool always + * use kernel wait, pass this flag to all parallelization functions. + */ +#define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002 + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Create a thread pool with the specified number of threads. + * + * @param threads_count the number of threads in the thread pool. + * A value of 0 has special interpretation: it creates a thread pool with as + * many threads as there are logical processors in the system. + * + * @returns A pointer to an opaque thread pool object if the call is + * successful, or NULL pointer if the call failed. + */ +pthreadpool_t pthreadpool_create(size_t threads_count); + +/** + * Query the number of threads in a thread pool. + * + * @param threadpool the thread pool to query. + * + * @returns The number of threads in the thread pool. + */ +size_t pthreadpool_get_threads_count(pthreadpool_t threadpool); + +/** + * Process items on a 1D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * function(context, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. The + * specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d( + pthreadpool_t threadpool, + pthreadpool_task_1d_t function, + void* context, + size_t range, + uint32_t flags); + +/** + * Process items on a 1D grid passing along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * function(context, thread_index, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. The + * specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_1d_with_thread_t function, + void* context, + size_t range, + uint32_t flags); + +/** + * Process items on a 1D grid using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range; i++) + * function(context, uarch_index, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range the number of items on the 1D grid to process. + * The specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_1d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range, + uint32_t flags); + +/** + * Process items on a 1D grid with specified maximum tile size. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i += tile) + * function(context, i, min(range - i, tile)); + * + * When the call returns, all items have been processed and the thread pool is + * ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, + * the calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. + * @param tile the maximum number of items on the 1D grid to process in + * one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_tile_1d( + pthreadpool_t threadpool, + pthreadpool_task_1d_tile_1d_t function, + void* context, + size_t range, + size_t tile, + uint32_t flags); + +/** + * Process items on a 2D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * function(context, i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d( + pthreadpool_t threadpool, + pthreadpool_task_2d_t function, + void* context, + size_t range_i, + size_t range_j, + uint32_t flags); + +/** + * Process items on a 2D grid passing along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * function(context, thread_index, i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_2d_with_thread_t function, + void* context, + size_t range_i, + size_t range_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_1d( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_1d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t tile_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_1d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_1d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t tile_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function and passing + * along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, thread_index, i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t tile_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the first dimension of + * the 2D grid to process in one function call. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_2d( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_2d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t tile_i, + size_t tile_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, + * cpuinfo initialization failed, or index returned + * by cpuinfo_get_current_uarch_index() exceeds + * the max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected + * by the specified function. If the index returned + * by cpuinfo_get_current_uarch_index() exceeds this + * value, default_uarch_index will be used instead. + * default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 2D grid. + * @param range_j the number of items to process along the second + * dimension of the 2D grid. + * @param tile_j the maximum number of items along the first + * dimension of the 2D grid to process in one function call. + * @param tile_j the maximum number of items along the second + * dimension of the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_2d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t tile_i, + size_t tile_j, + uint32_t flags); + +/** + * Process items on a 3D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * function(context, i, j, k); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d( + pthreadpool_t threadpool, + pthreadpool_task_3d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension and passing along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, thread_index, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_with_thread_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function and passing + * along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, thread_index, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 3D grid to process in one function call. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_2d( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_2d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_j, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_j the maximum number of items along the second + * dimension of the 3D grid to process in one function call. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_2d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_j, + size_t tile_k, + uint32_t flags); + +/** + * Process items on a 4D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * function(context, i, j, k, l); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d( + pthreadpool_t threadpool, + pthreadpool_task_4d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, i, j, k, l, min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_1d( + pthreadpool_t threadpool, + pthreadpool_task_4d_tile_1d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_l, + uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 4D grid to process in one function call. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_2d( + pthreadpool_t threadpool, + pthreadpool_task_4d_tile_2d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_k, + size_t tile_l, + uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, uarch_index, i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 4D grid. + * @param range_j the number of items to process along the second + * dimension of the 4D grid. + * @param range_k the number of items to process along the third + * dimension of the 4D grid. + * @param range_l the number of items to process along the fourth + * dimension of the 4D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 4D grid to process in one function call. + * @param tile_l the maximum number of items along the fourth + * dimension of the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_2d_with_uarch( + pthreadpool_t threadpool, + pthreadpool_task_4d_tile_2d_with_id_t function, + void* context, + uint32_t default_uarch_index, + uint32_t max_uarch_index, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_k, + size_t tile_l, + uint32_t flags); + +/** + * Process items on a 5D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * function(context, i, j, k, l, m); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_5d( + pthreadpool_t threadpool, + pthreadpool_task_5d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + uint32_t flags); + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * function(context, i, j, k, l, m, min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_5d_tile_1d( + pthreadpool_t threadpool, + pthreadpool_task_5d_tile_1d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t tile_m, + uint32_t flags); + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * for (size_t m = 0; m < range_m; m += tile_m) + * function(context, i, j, k, l, m, + * min(range_l - l, tile_l), min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 5D grid to process in one function call. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_5d_tile_2d( + pthreadpool_t threadpool, + pthreadpool_task_5d_tile_2d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t tile_l, + size_t tile_m, + uint32_t flags); + +/** + * Process items on a 6D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n++) + * function(context, i, j, k, l, m, n); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_6d( + pthreadpool_t threadpool, + pthreadpool_task_6d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t range_n, + uint32_t flags); + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n += tile_n) + * function(context, i, j, k, l, m, n, min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_6d_tile_1d( + pthreadpool_t threadpool, + pthreadpool_task_6d_tile_1d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t range_n, + size_t tile_n, + uint32_t flags); + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * for (size_t n = 0; n < range_n; n += tile_n) + * function(context, i, j, k, l, m, n, + * min(range_m - m, tile_m), min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 6D grid to process in one function call. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_6d_tile_2d( + pthreadpool_t threadpool, + pthreadpool_task_6d_tile_2d_t function, + void* context, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t range_n, + size_t tile_m, + size_t tile_n, + uint32_t flags); + +/** + * Terminates threads in the thread pool and releases associated resources. + * + * @warning Accessing the thread pool after a call to this function constitutes + * undefined behaviour and may cause data corruption. + * + * @param[in,out] threadpool The thread pool to destroy. + */ +void pthreadpool_destroy(pthreadpool_t threadpool); + +#ifndef PTHREADPOOL_NO_DEPRECATED_API + +/* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */ +#if defined(__GNUC__) + #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__)) +#else + #define PTHREADPOOL_DEPRECATED +#endif + +typedef void (*pthreadpool_function_1d_t)(void*, size_t); +typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t); +typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t); +typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t); + +void pthreadpool_compute_1d( + pthreadpool_t threadpool, + pthreadpool_function_1d_t function, + void* argument, + size_t range) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_1d_tiled( + pthreadpool_t threadpool, + pthreadpool_function_1d_tiled_t function, + void* argument, + size_t range, + size_t tile) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_2d( + pthreadpool_t threadpool, + pthreadpool_function_2d_t function, + void* argument, + size_t range_i, + size_t range_j) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_2d_tiled( + pthreadpool_t threadpool, + pthreadpool_function_2d_tiled_t function, + void* argument, + size_t range_i, + size_t range_j, + size_t tile_i, + size_t tile_j) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_3d_tiled( + pthreadpool_t threadpool, + pthreadpool_function_3d_tiled_t function, + void* argument, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_i, + size_t tile_j, + size_t tile_k) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_4d_tiled( + pthreadpool_t threadpool, + pthreadpool_function_4d_tiled_t function, + void* argument, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_i, + size_t tile_j, + size_t tile_k, + size_t tile_l) PTHREADPOOL_DEPRECATED; + +#endif /* PTHREADPOOL_NO_DEPRECATED_API */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#ifdef __cplusplus + +namespace libpthreadpool { +namespace detail { +namespace { + +template +void call_wrapper_1d(void* arg, size_t i) { + (*static_cast(arg))(i); +} + +template +void call_wrapper_1d_tile_1d(void* arg, size_t range_i, size_t tile_i) { + (*static_cast(arg))(range_i, tile_i); +} + +template +void call_wrapper_2d(void* functor, size_t i, size_t j) { + (*static_cast(functor))(i, j); +} + +template +void call_wrapper_2d_tile_1d(void* functor, + size_t i, size_t range_j, size_t tile_j) +{ + (*static_cast(functor))(i, range_j, tile_j); +} + +template +void call_wrapper_2d_tile_2d(void* functor, + size_t range_i, size_t range_j, + size_t tile_i, size_t tile_j) +{ + (*static_cast(functor))(range_i, range_j, tile_i, tile_j); +} + +template +void call_wrapper_3d(void* functor, size_t i, size_t j, size_t k) { + (*static_cast(functor))(i, j, k); +} + +template +void call_wrapper_3d_tile_1d(void* functor, + size_t i, size_t j, size_t range_k, + size_t tile_k) +{ + (*static_cast(functor))(i, j, range_k, tile_k); +} + +template +void call_wrapper_3d_tile_2d(void* functor, + size_t i, size_t range_j, size_t range_k, + size_t tile_j, size_t tile_k) +{ + (*static_cast(functor))(i, range_j, range_k, tile_j, tile_k); +} + +template +void call_wrapper_4d(void* functor, size_t i, size_t j, size_t k, size_t l) { + (*static_cast(functor))(i, j, k, l); +} + +template +void call_wrapper_4d_tile_1d(void* functor, + size_t i, size_t j, size_t k, size_t range_l, + size_t tile_l) +{ + (*static_cast(functor))(i, j, k, range_l, tile_l); +} + +template +void call_wrapper_4d_tile_2d(void* functor, + size_t i, size_t j, size_t range_k, size_t range_l, + size_t tile_k, size_t tile_l) +{ + (*static_cast(functor))(i, j, range_k, range_l, tile_k, tile_l); +} + +template +void call_wrapper_5d(void* functor, size_t i, size_t j, size_t k, size_t l, size_t m) { + (*static_cast(functor))(i, j, k, l, m); +} + +template +void call_wrapper_5d_tile_1d(void* functor, + size_t i, size_t j, size_t k, size_t l, size_t range_m, + size_t tile_m) +{ + (*static_cast(functor))(i, j, k, l, range_m, tile_m); +} + +template +void call_wrapper_5d_tile_2d(void* functor, + size_t i, size_t j, size_t k, size_t range_l, size_t range_m, + size_t tile_l, size_t tile_m) +{ + (*static_cast(functor))(i, j, k, range_l, range_m, tile_l, tile_m); +} + +template +void call_wrapper_6d(void* functor, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n) { + (*static_cast(functor))(i, j, k, l, m, n); +} + +template +void call_wrapper_6d_tile_1d(void* functor, + size_t i, size_t j, size_t k, size_t l, size_t m, size_t range_n, + size_t tile_n) +{ + (*static_cast(functor))(i, j, k, l, m, range_n, tile_n); +} + +template +void call_wrapper_6d_tile_2d(void* functor, + size_t i, size_t j, size_t k, size_t l, size_t range_m, size_t range_n, + size_t tile_m, size_t tile_n) +{ + (*static_cast(functor))(i, j, k, l, range_m, range_n, tile_m, tile_n); +} + +} /* namespace */ +} /* namespace detail */ +} /* namespace libpthreadpool */ + +/** + * Process items on a 1D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * functor(i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each item. + * @param range the number of items on the 1D grid to process. The + * specified functor will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range, + uint32_t flags = 0) +{ + pthreadpool_parallelize_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_1d, + const_cast(static_cast(&functor)), + range, + flags); +} + +/** + * Process items on a 1D grid with specified maximum tile size. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i += tile) + * functor(i, min(range - i, tile)); + * + * When the call returns, all items have been processed and the thread pool is + * ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, + * the calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range the number of items on the 1D grid to process. + * @param tile the maximum number of items on the 1D grid to process in + * one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_1d_tile_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range, + size_t tile, + uint32_t flags = 0) +{ + pthreadpool_parallelize_1d_tile_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_1d_tile_1d, + const_cast(static_cast(&functor)), + range, + tile, + flags); +} + +/** + * Process items on a 2D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * functor(i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each item. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_2d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + uint32_t flags = 0) +{ + pthreadpool_parallelize_2d( + threadpool, + &libpthreadpool::detail::call_wrapper_2d, + const_cast(static_cast(&functor)), + range_i, + range_j, + flags); +} + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * functor(i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_2d_tile_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t tile_j, + uint32_t flags = 0) +{ + pthreadpool_parallelize_2d_tile_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_2d_tile_1d, + const_cast(static_cast(&functor)), + range_i, + range_j, + tile_j, + flags); +} + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * functor(i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the first dimension of + * the 2D grid to process in one functor call. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_2d_tile_2d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t tile_i, + size_t tile_j, + uint32_t flags = 0) +{ + pthreadpool_parallelize_2d_tile_2d( + threadpool, + &libpthreadpool::detail::call_wrapper_2d_tile_2d, + const_cast(static_cast(&functor)), + range_i, + range_j, + tile_i, + tile_j, + flags); +} + +/** + * Process items on a 3D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * functor(i, j, k); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_3d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + uint32_t flags = 0) +{ + pthreadpool_parallelize_3d( + threadpool, + &libpthreadpool::detail::call_wrapper_3d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + flags); +} + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * functor(i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_3d_tile_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_k, + uint32_t flags = 0) +{ + pthreadpool_parallelize_3d_tile_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_3d_tile_1d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + tile_k, + flags); +} + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * functor(i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 3D grid to process in one functor call. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_3d_tile_2d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t tile_j, + size_t tile_k, + uint32_t flags = 0) +{ + pthreadpool_parallelize_3d_tile_2d( + threadpool, + &libpthreadpool::detail::call_wrapper_3d_tile_2d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + tile_j, + tile_k, + flags); +} + +/** + * Process items on a 4D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * functor(i, j, k, l); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_4d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + uint32_t flags = 0) +{ + pthreadpool_parallelize_4d( + threadpool, + &libpthreadpool::detail::call_wrapper_4d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + flags); +} + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * functor(i, j, k, l, min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_4d_tile_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_l, + uint32_t flags = 0) +{ + pthreadpool_parallelize_4d_tile_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_4d_tile_1d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + tile_l, + flags); +} + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * functor(i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 4D grid to process in one functor call. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_4d_tile_2d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t tile_k, + size_t tile_l, + uint32_t flags = 0) +{ + pthreadpool_parallelize_4d_tile_2d( + threadpool, + &libpthreadpool::detail::call_wrapper_4d_tile_2d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + tile_k, + tile_l, + flags); +} + +/** + * Process items on a 5D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * functor(i, j, k, l, m); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_5d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + uint32_t flags = 0) +{ + pthreadpool_parallelize_5d( + threadpool, + &libpthreadpool::detail::call_wrapper_5d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + range_m, + flags); +} + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * functor(i, j, k, l, m, min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_5d_tile_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t tile_m, + uint32_t flags = 0) +{ + pthreadpool_parallelize_5d_tile_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_5d_tile_1d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + range_m, + tile_m, + flags); +} + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * for (size_t m = 0; m < range_m; m += tile_m) + * functor(i, j, k, l, m, + * min(range_l - l, tile_l), min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 5D grid to process in one functor call. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_5d_tile_2d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t tile_l, + size_t tile_m, + uint32_t flags = 0) +{ + pthreadpool_parallelize_5d_tile_2d( + threadpool, + &libpthreadpool::detail::call_wrapper_5d_tile_2d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + range_m, + tile_l, + tile_m, + flags); +} + +/** + * Process items on a 6D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n++) + * functor(i, j, k, l, m, n); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_6d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t range_n, + uint32_t flags = 0) +{ + pthreadpool_parallelize_6d( + threadpool, + &libpthreadpool::detail::call_wrapper_6d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + range_m, + range_n, + flags); +} + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n += tile_n) + * functor(i, j, k, l, m, n, min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_6d_tile_1d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t range_n, + size_t tile_n, + uint32_t flags = 0) +{ + pthreadpool_parallelize_6d_tile_1d( + threadpool, + &libpthreadpool::detail::call_wrapper_6d_tile_1d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + range_m, + range_n, + tile_n, + flags); +} + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * for (size_t n = 0; n < range_n; n += tile_n) + * functor(i, j, k, l, m, n, + * min(range_m - m, tile_m), min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 6D grid to process in one functor call. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_6d_tile_2d( + pthreadpool_t threadpool, + const T& functor, + size_t range_i, + size_t range_j, + size_t range_k, + size_t range_l, + size_t range_m, + size_t range_n, + size_t tile_m, + size_t tile_n, + uint32_t flags = 0) +{ + pthreadpool_parallelize_6d_tile_2d( + threadpool, + &libpthreadpool::detail::call_wrapper_6d_tile_2d, + const_cast(static_cast(&functor)), + range_i, + range_j, + range_k, + range_l, + range_m, + range_n, + tile_m, + tile_n, + flags); +} + +#endif /* __cplusplus */ + +#endif /* PTHREADPOOL_H_ */