diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85e415762091de9b0a8d7f143db9f4edbf6bb3a7 Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc differ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h new file mode 100644 index 0000000000000000000000000000000000000000..b94c4d46dba26adccf67224f3f82de0b9387b284 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h @@ -0,0 +1,22 @@ +#pragma once + +// Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's +// obvious if you forgot to include Config.h +// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined +// +// DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h + +#define AT_MKLDNN_ENABLED() 1 +#define AT_MKLDNN_ACL_ENABLED() 0 +#define AT_MKL_ENABLED() 1 +#define AT_MKL_SEQUENTIAL() 0 +#define AT_POCKETFFT_ENABLED() 0 +#define AT_NNPACK_ENABLED() 1 +#define CAFFE2_STATIC_LINK_CUDA() 0 +#define AT_BUILD_WITH_BLAS() 1 +#define AT_BUILD_WITH_LAPACK() 1 +#define AT_PARALLEL_OPENMP 1 +#define AT_PARALLEL_NATIVE 0 +#define AT_PARALLEL_NATIVE_TBB 0 +#define AT_BLAS_F2C() 0 +#define AT_BLAS_USE_CBLAS_DOT() 0 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h new file mode 100644 index 0000000000000000000000000000000000000000..b648ef616284f971ab51e2bf9d8e7dd557237bb7 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include + +namespace at::functorch { + +// NOTE [functorch TLS in pytorch/pytorch] +// +// functorch lives out-of-tree. However, it has some TLS that needs to be +// propagated. The solution for that is we store a pointer to the TLS +// inside pytorch/pytorch and extend FuncTorchTLSBase inside functorch to +// include whatever functorch needs. +// +// We need to store a pointer due to the indirection: +// inside functorch, we will create a subclass of FunctorchTLSBase called +// FuncTorchTLSImpl that actually contains metadata, like the DynamicLayerStack. +// FuncTorchTLSBase doesn't have any metadata because it hasn't been defined +// yet. +// +// Here in pytorch/pytorch, we will pass around FuncTorchTLSBase*, but inside +// functorch, we will assign a FuncTorchTLSImpl* to the FunctorchTLSBase*. +// We can't directly pass around FunctorchTLSBase (without a pointer) because +// FuncTorchTLSImpl does not fit inside a FuncTorchTLSBase by virtue of having +// more elements. +struct TORCH_API FuncTorchTLSBase { + virtual ~FuncTorchTLSBase() = default; + virtual std::unique_ptr deepcopy() const = 0; + + virtual int64_t checkSupportsSingleLevelAutogradFunction() const = 0; + virtual void checkSupportsCppAutogradFunction() const = 0; + virtual void checkSupportsInplaceRequiresGrad() const = 0; + virtual void checkSupportsRetainGrad() const = 0; +}; + +// returns deepcopy of the functorch tls +TORCH_API std::unique_ptr getCopyOfFuncTorchTLS(); + +// sets the functorch tls. always does a deep copy. +TORCH_API void setFuncTorchTLS( + const std::shared_ptr& state); + +// get a mutable reference to the functorch tls +TORCH_API std::unique_ptr& functorchTLSAccessor(); + +} // namespace at::functorch diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h new file mode 100644 index 0000000000000000000000000000000000000000..a1231088c2b31e3fd8c40ed17ccdd41f36035367 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +namespace at::impl { + +// VmapMode contains a thread local count of how many nested vmaps +// we are currently inside. That number is known as the `vmap level`. +// VmapMode is used in the implementation of the Python `torch.vmap` API. +// +// NOTE: this is NOT the c++ api for torch.vmap. That doesn't exist yet. + +struct TORCH_API VmapMode { + // Returns the vmap level, aka the count of how many nested vmaps we're in. + static int64_t current_vmap_level(); + + // Increment the count of nested vmaps. If this causes the vmap level to be + // greater than 0, then it enables DispatchKey::VmapMode on all tensors. + static int64_t increment_nesting(); + + // Decrements the count of nested vmaps. If this causes the vmap level to be + // equal to 0, then it disables DispatchKey::VmapMode on all tensors. + static int64_t decrement_nesting(); +}; + +} // namespace at::impl diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h new file mode 100644 index 0000000000000000000000000000000000000000..97729b3254e7458fa76fed74469bf77ed796fe7d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h @@ -0,0 +1,183 @@ +#pragma once + +#include +#include + +namespace at { + +// This file contains abstractions used for transforming *logical* vmap +// arguments into *physical* arguments. (Keep reading for definitions of these +// terms). + +// NOTE: [Logical vs physical args] +// Consider the following vmap. +// vmap(vmap(func, in_dims=(2,)), in_dims=(0,))(torch.ones(2, 3, 4)) +// This would produce a BatchedTensor wrapping a Tensor of size [2, 3, 4], +// with batch dims 0 and 2: +// BatchedTensor(ones(2, 3, 4), bdims=[(lvl=1,dim=0),(lvl=2,dim=2)]) +// +// We say the *logical* view of the tensor has size [3] -- tensors inside +// `func` appear to have size [3]. +// However, the *physical* underlying tensor (the one passed to vmap) has size +// [2, 3, 4]. +// +// This notion of logical vs physical also extends to non-tensor arguments. +// Consider the previous tensor; let's assume the user called +// `torch.sum(tensor, dim=0)` inside of `func`. Then the logical +// dimension they are reducing over is dim 0 but the physical dim is dim 1 +// (the first non-batch dimension) + +// Forward declared; see NOTE: [What is a VmapPhysicalView?] +struct VmapPhysicalView; + +// Most PyTorch operators take 4 or fewer inputs. +constexpr int64_t kVmapTransformStaticInputSize = 4; +using VmapPhysicalViewVec = + SmallVector; + +// Pytorch generally advertises good performance for <= 5 dims. +// (see ATen/core/DimVector.h). We add a few extra dims (~3) for vmap +// dimensions to get 8. Adjust this number as necessary +constexpr int64_t kVmapStaticDimVecSize = 8; +using VmapDimVector = SmallVector; +using VmapSymDimVector = SmallVector; + +// NOTE: [What is an VmapTransform?] +// An *VmapTransform* converts logical views of tensors to physical views. +// +// Batching rules use VmapTransforms to convert logical arguments to +// physical arguments, then call one or more at:: operator that handles the +// physical arguments, and then converts the physical result back to a logical +// argument. + +// VmapTransform for operators that take tensors with multiple batch dims. +// Given one or more logical views on Tensors, `logicalToPhysical` +// permutes all of the batch dims to the front of the tensor, aligns +// and expands the batch dims to match each other (according to their `level`), +// and returns a VmapPhysicalView on the tensor(s). +struct TORCH_API MultiBatchVmapTransform { + static VmapPhysicalView logicalToPhysical(const Tensor& logical_tensor); + static VmapPhysicalViewVec logicalToPhysical(ITensorListRef logical_tensors); +}; + +// VmapTransform for operators that broadcast all inputs. +// Given some logical views on Tensors, `logicalToPhysical`: +// - permutes all of the batch dims to the front of the tensors +// - aligns all the batch dims to the collective levels of all of the tensors. +// If a tensor does not have a batch dim for a vmap level, then it receives +// a size-one dimension for said level. +// - aligns the non-batch dims to have the same dimensionality, adding extra +// size-1 dimensions in between the batch dimensions and the non-batch +// dimensions so that the batch dimensions are lined up from the right. +// +// For example: given inputs of size (B, 2) and (B, 3, 2) where B is the batch +// dimension, BroadcastingVmapTransform returns VmapPhysicalViews that wrap +// tensors of size (B, 1, 2) and (B, 3, 2). +// +// Given inputs of size (B, 2) and (2,), BroadcastingVmapTransform returns +// VmapPhysicalViews wrapping tensors of size (B, 2) and (1, 2). We don't +// actually *need* to return a tensor of size (1, 2) for the second tensor +// because the broadcasting operation takes care of that for us, but we do +// it anyways to keep things simple. +struct TORCH_API BroadcastingVmapTransform { + static VmapPhysicalViewVec logicalToPhysical(TensorList logical_tensors); +}; + +// Forward declared, if you're reading this file head to toe, don't worry about +// it yet. +struct VmapPhysicalToLogicalMap; + +// NOTE: [What is a VmapPhysicalView?] +// VmapPhysicalView represents a physical view on a Tensor. +// +// One can use it to further convert logical dimension indices, logical shapes, +// and more to their physical variants, or convert a new (physical) tensor into +// a logical BatchedTensor. (TODO(rzou): some of these are not yet implemented). +// +// VmapPhysicalView stores a physical tensor with all of its batch dimensions at +// the front and some levels that correspond to said batch dimensions. +// +// The levels bitset specifies which vmap levels correspond to the batch +// dimensions at the front of the tensor. In particular, the number of set bits +// corresponds to the number of batch dimensions on `tensor` and the rightmost +// bit of `levels` specifies the maximum number of nested vmaps we are in at +// this point in time. +// For example, given: +// physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5, 6), levels={1, 3}) +// +// Rightmost bit of `levels` is 3 indicating the number of nested vmaps less +// than or equal to 3. +// bitset: 010100 +// ^ +// | +// levels: 012345 +struct TORCH_API VmapPhysicalView { + VmapPhysicalView(Tensor&& tensor, std::bitset levels) + : levels_(levels), tensor_(std::move(tensor)) { + TORCH_INTERNAL_ASSERT(!isBatchedTensor(tensor_)); + } + + Tensor& tensor() { + return tensor_; + } + const Tensor& tensor() const { + return tensor_; + } + + // Maps logical dim indices to physical dim indices. Also does dim wrapping. + // + // For example, given: + // physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5), levels={1, 3}) + // + // Then physical_view.getPhysicalDims({0, 1}) returns {2, 3}. + // This is because the size of levels tell us that the first two dimensions + // of `tensor_` are batch dimensions, so a logical dim of `n` is actually + // a physical dim of `n + 2`. + VmapDimVector getPhysicalDims(OptionalIntArrayRef logical_dims) const; + int64_t getPhysicalDim(int64_t logical_dim) const; + + // Returns a VmapPhysicalToLogicalMap object. This can be used for + // mapping a physical tensor to a new logical tensor (BatchedTensor) + VmapPhysicalToLogicalMap getPhysicalToLogicalMap() const; + + // Maps a logical shape to a physical shape by pre-pending the batch + // sizes to the logical shape. + VmapDimVector getPhysicalShape(IntArrayRef logical_shape) const; + + int64_t numBatchDims() const; + + private: + int64_t numLogicalDims() const; + + std::bitset levels_; + Tensor tensor_; +}; + +// Convenience struct used for mapping a physical tensor (a non-BatchedTensor) +// to a logical one (BatchedTensor). It holds some levels that are used to do +// the mapping and assumes that the batch dimensions in the physical tensor all +// occur at the front of the tensor. +struct TORCH_API VmapPhysicalToLogicalMap { + VmapPhysicalToLogicalMap(std::bitset levels) + : levels_(levels) {} + + // Maps a physical tensor to a new logical tensor (BatchedTensor). + // Assumes that all of the "batch dimensions" are at the front + // of the physical tensor. For example, given: + // - x = rank-4 Tensor with size 2, 3, 5, 7 + // - levels = (2, 4) + // Returns: + // - BatchedTensor(x, bdims=[(dim=0,lvl=2), (dim=1, lvl=4)]) + Tensor apply(const Tensor& physical_tensor) const; + + // Given a vector of physical tensors, + // 1. maps each tensor to a new logical tensor. Assumes that all of the + // "batch dimensions" are at the front of the physical tensors. + // 2. stores the new logical tensors back into the passed-in vector. This is + // to avoid additional dynamic allocations. + void applyInplace(std::vector& physical_tensors) const; + + std::bitset levels_; +}; + +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h new file mode 100644 index 0000000000000000000000000000000000000000..d00195b07e490208db6aa9a015bca79b0cc1c83f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +// For FP16 or BFloat16 inputs, ops should perform internal math in FP32. +template +struct OpMathType { + using type = scalar_t; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType { + using type = float; +}; +template <> +struct OpMathType> { + using type = c10::complex; +}; + +template +using opmath_type = typename OpMathType::type; + +namespace { + +inline c10::ScalarType toOpMathType(const c10::ScalarType type) { + switch (type) { +#define DEFINE_CASE(scalar_t, TypeNum) \ + case ScalarType::TypeNum: \ + return CppTypeToScalarType>::value; + + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_INTERNAL_ASSERT(false, "Unrecognized ScalarType: ", type); + } +} + +} // namespace + +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h new file mode 100644 index 0000000000000000000000000000000000000000..5bb358e0e537796aefe5fcfa6afe72de713e0e33 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h @@ -0,0 +1,3099 @@ +// This file contains all native_functions that can be registered to +// and the schema string that they should be registered with + +Tensor _cast_Byte(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Byte(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Char(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Char(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Double(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Double(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Float(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Float(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Int(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Int(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Long(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Long(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Short(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Short(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _cast_Half(const Tensor & self, bool non_blocking); // {"schema": "aten::_cast_Half(Tensor self, bool non_blocking=False) -> Tensor", "dispatch": "False", "default": "True"} +void _backward(const Tensor & self, TensorList inputs, const c10::optional & gradient, c10::optional retain_graph, bool create_graph); // {"schema": "aten::_backward(Tensor self, Tensor[] inputs, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> ()", "dispatch": "False", "default": "True"} +void set_data(Tensor & self, const Tensor & new_data); // {"schema": "aten::set_data(Tensor(a!) self, Tensor new_data) -> ()", "dispatch": "False", "default": "True"} +Tensor data(const Tensor & self); // {"schema": "aten::data(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +bool is_leaf(const Tensor & self); // {"schema": "aten::is_leaf(Tensor self) -> bool", "dispatch": "False", "default": "True"} +int64_t output_nr(const Tensor & self); // {"schema": "aten::output_nr(Tensor self) -> int", "dispatch": "False", "default": "True"} +int64_t _version(const Tensor & self); // {"schema": "aten::_version(Tensor self) -> int", "dispatch": "False", "default": "True"} +Tensor & requires_grad_(Tensor & self, bool requires_grad); // {"schema": "aten::requires_grad_(Tensor(a!) self, bool requires_grad=True) -> Tensor(a!)", "dispatch": "False", "default": "True"} +void retain_grad(Tensor & self); // {"schema": "aten::retain_grad(Tensor(a!) self) -> ()", "dispatch": "False", "default": "True"} +bool retains_grad(const Tensor & self); // {"schema": "aten::retains_grad(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor _fw_primal(const Tensor & self, int64_t level); // {"schema": "aten::_fw_primal(Tensor(a) self, int level) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor _make_dual(const Tensor & primal, const Tensor & tangent, int64_t level); // {"schema": "aten::_make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)", "dispatch": "True", "default": "True"} +::std::tuple _unpack_dual(const Tensor & dual, int64_t level); // {"schema": "aten::_unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)", "dispatch": "False", "default": "True"} +Tensor _new_zeros_with_same_feature_meta(const Tensor & self, const Tensor & other, int64_t self_num_batch_dims); // {"schema": "aten::_new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor", "dispatch": "True", "default": "True"} +bool _has_same_storage_numel(const Tensor & self, const Tensor & other); // {"schema": "aten::_has_same_storage_numel(Tensor self, Tensor other) -> bool", "dispatch": "True", "default": "True"} +Tensor & rename_(Tensor & self, c10::optional names); // {"schema": "aten::rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor rename(const Tensor & self, c10::optional names); // {"schema": "aten::rename(Tensor(a) self, Dimname[]? names) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor align_to(const Tensor & self, DimnameList names); // {"schema": "aten::align_to(Tensor(a) self, Dimname[] names) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor align_to(const Tensor & self, DimnameList order, int64_t ellipsis_idx); // {"schema": "aten::align_to.ellipsis_idx(Tensor(a) self, Dimname[] order, int ellipsis_idx) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor align_as(const Tensor & self, const Tensor & other); // {"schema": "aten::align_as(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector align_tensors(TensorList tensors); // {"schema": "aten::align_tensors(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +void _assert_async(const Tensor & self); // {"schema": "aten::_assert_async(Tensor self) -> ()", "dispatch": "True", "default": "False"} +void _assert_async(const Tensor & self, c10::string_view assert_msg); // {"schema": "aten::_assert_async.msg(Tensor self, str assert_msg) -> ()", "dispatch": "True", "default": "False"} +void _assert_scalar(const Scalar & self, c10::string_view assert_msg); // {"schema": "aten::_assert_scalar(Scalar self, str assert_msg) -> ()", "dispatch": "True", "default": "True"} +Tensor _functional_assert_scalar(const Scalar & self, c10::string_view assert_msg, const Tensor & dep_token); // {"schema": "aten::_functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _functional_assert_async(const Tensor & self, c10::string_view assert_msg, const Tensor & dep_token); // {"schema": "aten::_functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "False"} +void _assert_tensor_metadata(const Tensor & a, OptionalSymIntArrayRef size, OptionalSymIntArrayRef stride, c10::optional dtype); // {"schema": "aten::_assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()", "dispatch": "False", "default": "True"} +void _print(c10::string_view s); // {"schema": "aten::_print(str s) -> ()", "dispatch": "True", "default": "True"} +void sym_constrain_range(const Scalar & size, c10::optional min, c10::optional max); // {"schema": "aten::sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()", "dispatch": "True", "default": "True"} +void sym_constrain_range_for_size(const Scalar & size, c10::optional min, c10::optional max); // {"schema": "aten::sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()", "dispatch": "True", "default": "True"} +Tensor _functional_sym_constrain_range(const Scalar & size, c10::optional min, c10::optional max, const Tensor & dep_token); // {"schema": "aten::_functional_sym_constrain_range(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _functional_sym_constrain_range_for_size(const Scalar & size, c10::optional min, c10::optional max, const Tensor & dep_token); // {"schema": "aten::_functional_sym_constrain_range_for_size(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _make_dep_token(c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::_make_dep_token(*, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor refine_names(const Tensor & self, DimnameList names); // {"schema": "aten::refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a)", "dispatch": "False", "default": "True"} +bool _use_cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank); // {"schema": "aten::_use_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool", "dispatch": "True", "default": "False"} +bool _use_cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank); // {"schema": "aten::_use_cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank) -> bool", "dispatch": "True", "default": "False"} +::std::tuple _cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool deterministic, bool zero_infinity); // {"schema": "aten::_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _cudnn_ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, bool deterministic, bool zero_infinity); // {"schema": "aten::_cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +bool _use_cudnn_rnn_flatten_weight(); // {"schema": "aten::_use_cudnn_rnn_flatten_weight() -> bool", "dispatch": "False", "default": "True"} +Tensor _cudnn_rnn_flatten_weight(TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional); // {"schema": "aten::_cudnn_rnn_flatten_weight(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _cudnn_rnn(const Tensor & input, TensorList weight, int64_t weight_stride0, const c10::optional & weight_buf, const Tensor & hx, const c10::optional & cx, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const c10::optional & dropout_state); // {"schema": "aten::_cudnn_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor? weight_buf, Tensor hx, Tensor? cx, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple> _cudnn_rnn_backward(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const c10::optional & cx, const Tensor & output, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const c10::optional & dropout_state, const Tensor & reserve, ::std::array output_mask); // {"schema": "aten::_cudnn_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])", "dispatch": "True", "default": "False"} +Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "False"} +int64_t _debug_has_internal_overlap(const Tensor & self); // {"schema": "aten::_debug_has_internal_overlap(Tensor self) -> int", "dispatch": "False", "default": "True"} +::std::tuple _fused_dropout(const Tensor & self, double p, c10::optional generator); // {"schema": "aten::_fused_dropout(Tensor self, float p, Generator? generator=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _masked_scale(const Tensor & self, const Tensor & mask, double scale); // {"schema": "aten::_masked_scale(Tensor self, Tensor mask, float scale) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple native_dropout(const Tensor & input, double p, c10::optional train); // {"schema": "aten::native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor native_dropout_backward(const Tensor & grad_output, const Tensor & mask, double scale); // {"schema": "aten::native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _sobol_engine_draw(const Tensor & quasi, int64_t n, const Tensor & sobolstate, int64_t dimension, int64_t num_generated, c10::optional dtype); // {"schema": "aten::_sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor & _sobol_engine_ff_(Tensor & self, int64_t n, const Tensor & sobolstate, int64_t dimension, int64_t num_generated); // {"schema": "aten::_sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & _sobol_engine_scramble_(Tensor & self, const Tensor & ltm, int64_t dimension); // {"schema": "aten::_sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & _sobol_engine_initialize_state_(Tensor & self, int64_t dimension); // {"schema": "aten::_sobol_engine_initialize_state_(Tensor(a!) self, int dimension) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor _reshape_from_tensor(const Tensor & self, const Tensor & shape); // {"schema": "aten::_reshape_from_tensor(Tensor self, Tensor shape) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _shape_as_tensor(const Tensor & self); // {"schema": "aten::_shape_as_tensor(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor dropout(const Tensor & input, double p, bool train); // {"schema": "aten::dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & dropout_(Tensor & self, double p, bool train); // {"schema": "aten::dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor feature_dropout(const Tensor & input, double p, bool train); // {"schema": "aten::feature_dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & feature_dropout_(Tensor & self, double p, bool train); // {"schema": "aten::feature_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor alpha_dropout(const Tensor & input, double p, bool train); // {"schema": "aten::alpha_dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & alpha_dropout_(Tensor & self, double p, bool train); // {"schema": "aten::alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor feature_alpha_dropout(const Tensor & input, double p, bool train); // {"schema": "aten::feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & feature_alpha_dropout_(Tensor & self, double p, bool train); // {"schema": "aten::feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor abs(const Tensor & self); // {"schema": "aten::abs(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & abs_(Tensor & self); // {"schema": "aten::abs_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & abs_out(const Tensor & self, Tensor & out); // {"schema": "aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor absolute(const Tensor & self); // {"schema": "aten::absolute(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & absolute_(Tensor & self); // {"schema": "aten::absolute_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & absolute_out(const Tensor & self, Tensor & out); // {"schema": "aten::absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor angle(const Tensor & self); // {"schema": "aten::angle(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & angle_out(const Tensor & self, Tensor & out); // {"schema": "aten::angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor view_as_real(const Tensor & self); // {"schema": "aten::view_as_real(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor view_as_complex(const Tensor & self); // {"schema": "aten::view_as_complex(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor sgn(const Tensor & self); // {"schema": "aten::sgn(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sgn_(Tensor & self); // {"schema": "aten::sgn_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sgn_out(const Tensor & self, Tensor & out); // {"schema": "aten::sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor chalf(const Tensor & self, c10::optional memory_format); // {"schema": "aten::chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor real(const Tensor & self); // {"schema": "aten::real(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor imag(const Tensor & self); // {"schema": "aten::imag(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _conj(const Tensor & self); // {"schema": "aten::_conj(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor conj(const Tensor & self); // {"schema": "aten::conj(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _conj_physical(const Tensor & self); // {"schema": "aten::_conj_physical(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor conj_physical(const Tensor & self); // {"schema": "aten::conj_physical(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & conj_physical_out(const Tensor & self, Tensor & out); // {"schema": "aten::conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & conj_physical_(Tensor & self); // {"schema": "aten::conj_physical_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resolve_conj(const Tensor & self); // {"schema": "aten::resolve_conj(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor resolve_neg(const Tensor & self); // {"schema": "aten::resolve_neg(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _neg_view(const Tensor & self); // {"schema": "aten::_neg_view(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor acos(const Tensor & self); // {"schema": "aten::acos(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & acos_(Tensor & self); // {"schema": "aten::acos_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & acos_out(const Tensor & self, Tensor & out); // {"schema": "aten::acos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arccos(const Tensor & self); // {"schema": "aten::arccos(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arccos_(Tensor & self); // {"schema": "aten::arccos_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arccos_out(const Tensor & self, Tensor & out); // {"schema": "aten::arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor avg_pool1d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad); // {"schema": "aten::avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor adaptive_avg_pool1d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple adaptive_max_pool1d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor add(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & add_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & add_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _add_relu(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _add_relu_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_add_relu_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _add_relu_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::_add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _add_relu(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::_add_relu.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _add_relu_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::_add_relu_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor add(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & add_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor addmv(const Tensor & self, const Tensor & mat, const Tensor & vec, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addmv_(Tensor & self, const Tensor & mat, const Tensor & vec, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmv_(Tensor(a!) self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addmv_out(const Tensor & self, const Tensor & mat, const Tensor & vec, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addmv.out(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addr(const Tensor & self, const Tensor & vec1, const Tensor & vec2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addr_(Tensor & self, const Tensor & vec1, const Tensor & vec2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addr_out(const Tensor & self, const Tensor & vec1, const Tensor & vec2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor affine_grid_generator(const Tensor & theta, c10::SymIntArrayRef size, bool align_corners); // {"schema": "aten::affine_grid_generator(Tensor theta, SymInt[] size, bool align_corners) -> Tensor", "dispatch": "True", "default": "True"} +Tensor affine_grid_generator_backward(const Tensor & grad, c10::SymIntArrayRef size, bool align_corners); // {"schema": "aten::affine_grid_generator_backward(Tensor grad, SymInt[] size, bool align_corners) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _is_all_true(const Tensor & self); // {"schema": "aten::_is_all_true(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _is_any_true(const Tensor & self); // {"schema": "aten::_is_any_true(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_check_tensor(const Tensor & self); // {"schema": "aten::_test_check_tensor(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_functorch_fallback(const Tensor & self, const Tensor & other); // {"schema": "aten::_test_functorch_fallback(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor all(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor all(const Tensor & self, OptionalIntArrayRef dim, bool keepdim); // {"schema": "aten::all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & all_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & out); // {"schema": "aten::all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & all_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor all(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & all_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & out); // {"schema": "aten::all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +bool allclose(const Tensor & self, const Tensor & other, double rtol, double atol, bool equal_nan); // {"schema": "aten::allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool", "dispatch": "True", "default": "True"} +Tensor any(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::any.dim(Tensor self, int dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor any(const Tensor & self, OptionalIntArrayRef dim, bool keepdim); // {"schema": "aten::any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & any_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & out); // {"schema": "aten::any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & any_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor any(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & any_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & out); // {"schema": "aten::any.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor arange(const Scalar & end, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor arange(const Scalar & start, const Scalar & end, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor arange(const Scalar & start, const Scalar & end, const Scalar & step, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & arange_out(const Scalar & end, Tensor & out); // {"schema": "aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & arange_out(const Scalar & start, const Scalar & end, const Scalar & step, Tensor & out); // {"schema": "aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _dim_arange(const Tensor & like, int64_t dim); // {"schema": "aten::_dim_arange(Tensor like, int dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor argmax(const Tensor & self, c10::optional dim, bool keepdim); // {"schema": "aten::argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & argmax_out(const Tensor & self, c10::optional dim, bool keepdim, Tensor & out); // {"schema": "aten::argmax.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor argmin(const Tensor & self, c10::optional dim, bool keepdim); // {"schema": "aten::argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & argmin_out(const Tensor & self, c10::optional dim, bool keepdim, Tensor & out); // {"schema": "aten::argmin.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor acosh(const Tensor & self); // {"schema": "aten::acosh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & acosh_(Tensor & self); // {"schema": "aten::acosh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & acosh_out(const Tensor & self, Tensor & out); // {"schema": "aten::acosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arccosh(const Tensor & self); // {"schema": "aten::arccosh(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arccosh_(Tensor & self); // {"schema": "aten::arccosh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arccosh_out(const Tensor & self, Tensor & out); // {"schema": "aten::arccosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor asinh(const Tensor & self); // {"schema": "aten::asinh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & asinh_(Tensor & self); // {"schema": "aten::asinh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & asinh_out(const Tensor & self, Tensor & out); // {"schema": "aten::asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arcsinh(const Tensor & self); // {"schema": "aten::arcsinh(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arcsinh_(Tensor & self); // {"schema": "aten::arcsinh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arcsinh_out(const Tensor & self, Tensor & out); // {"schema": "aten::arcsinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor atanh(const Tensor & self); // {"schema": "aten::atanh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & atanh_(Tensor & self); // {"schema": "aten::atanh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & atanh_out(const Tensor & self, Tensor & out); // {"schema": "aten::atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arctanh(const Tensor & self); // {"schema": "aten::arctanh(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arctanh_(Tensor & self); // {"schema": "aten::arctanh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arctanh_out(const Tensor & self, Tensor & out); // {"schema": "aten::arctanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor as_strided(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional storage_offset); // {"schema": "aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)", "dispatch": "True", "default": "False"} +const Tensor & as_strided_(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional storage_offset); // {"schema": "aten::as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor asin(const Tensor & self); // {"schema": "aten::asin(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & asin_(Tensor & self); // {"schema": "aten::asin_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & asin_out(const Tensor & self, Tensor & out); // {"schema": "aten::asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arcsin(const Tensor & self); // {"schema": "aten::arcsin(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arcsin_(Tensor & self); // {"schema": "aten::arcsin_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arcsin_out(const Tensor & self, Tensor & out); // {"schema": "aten::arcsin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor atan(const Tensor & self); // {"schema": "aten::atan(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & atan_(Tensor & self); // {"schema": "aten::atan_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & atan_out(const Tensor & self, Tensor & out); // {"schema": "aten::atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor arctan(const Tensor & self); // {"schema": "aten::arctan(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arctan_(Tensor & self); // {"schema": "aten::arctan_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arctan_out(const Tensor & self, Tensor & out); // {"schema": "aten::arctan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor atleast_1d(const Tensor & self); // {"schema": "aten::atleast_1d(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector atleast_1d(TensorList tensors); // {"schema": "aten::atleast_1d.Sequence(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor atleast_2d(const Tensor & self); // {"schema": "aten::atleast_2d(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector atleast_2d(TensorList tensors); // {"schema": "aten::atleast_2d.Sequence(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor atleast_3d(const Tensor & self); // {"schema": "aten::atleast_3d(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector atleast_3d(TensorList tensors); // {"schema": "aten::atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor baddbmm(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & baddbmm_(Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & baddbmm_out(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bartlett_window(int64_t window_length, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bartlett_window(int64_t window_length, bool periodic, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::bartlett_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor batch_norm(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double momentum, double eps, bool cudnn_enabled); // {"schema": "aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantized_batch_norm(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & mean, const Tensor & var, double eps, double output_scale, int64_t output_zero_point); // {"schema": "aten::quantized_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _batch_norm_impl_index(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double momentum, double eps, bool cudnn_enabled); // {"schema": "aten::_batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)", "dispatch": "False", "default": "True"} +::std::tuple _batch_norm_impl_index_backward(int64_t impl_index, const Tensor & input, const Tensor & grad_output, const c10::optional & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_var_transform, bool train, double eps, ::std::array output_mask, const Tensor & reservedSpace); // {"schema": "aten::_batch_norm_impl_index_backward(int impl_index, Tensor input, Tensor grad_output, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var_transform, bool train, float eps, bool[3] output_mask, Tensor reservedSpace) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor bernoulli(const Tensor & self, c10::optional generator); // {"schema": "aten::bernoulli(Tensor self, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bernoulli_out(const Tensor & self, c10::optional generator, Tensor & out); // {"schema": "aten::bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bernoulli_(Tensor & self, const Tensor & p, c10::optional generator); // {"schema": "aten::bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bernoulli_(Tensor & self, double p, c10::optional generator); // {"schema": "aten::bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bernoulli(const Tensor & self, double p, c10::optional generator); // {"schema": "aten::bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bilinear(const Tensor & input1, const Tensor & input2, const Tensor & weight, const c10::optional & bias); // {"schema": "aten::bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor binary_cross_entropy(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction); // {"schema": "aten::binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & binary_cross_entropy_out(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, Tensor & out); // {"schema": "aten::binary_cross_entropy.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor binary_cross_entropy_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction); // {"schema": "aten::binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & binary_cross_entropy_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, Tensor & grad_input); // {"schema": "aten::binary_cross_entropy_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor binary_cross_entropy_with_logits(const Tensor & self, const Tensor & target, const c10::optional & weight, const c10::optional & pos_weight, int64_t reduction); // {"schema": "aten::binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bincount(const Tensor & self, const c10::optional & weights, int64_t minlength); // {"schema": "aten::bincount(Tensor self, Tensor? weights=None, int minlength=0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor bitwise_not(const Tensor & self); // {"schema": "aten::bitwise_not(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_not_(Tensor & self); // {"schema": "aten::bitwise_not_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_not_out(const Tensor & self, Tensor & out); // {"schema": "aten::bitwise_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & copysign_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor copysign(const Tensor & self, const Tensor & other); // {"schema": "aten::copysign.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copysign_(Tensor & self, const Tensor & other); // {"schema": "aten::copysign_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor copysign(const Tensor & self, const Scalar & other); // {"schema": "aten::copysign.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copysign_(Tensor & self, const Scalar & other); // {"schema": "aten::copysign_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & copysign_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::copysign.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _lazy_clone(const Tensor & self); // {"schema": "aten::_lazy_clone(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logical_not(const Tensor & self); // {"schema": "aten::logical_not(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_not_(Tensor & self); // {"schema": "aten::logical_not_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_not_out(const Tensor & self, Tensor & out); // {"schema": "aten::logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logical_xor(const Tensor & self, const Tensor & other); // {"schema": "aten::logical_xor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_xor_(Tensor & self, const Tensor & other); // {"schema": "aten::logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_xor_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logical_and(const Tensor & self, const Tensor & other); // {"schema": "aten::logical_and(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_and_(Tensor & self, const Tensor & other); // {"schema": "aten::logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_and_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logical_or(const Tensor & self, const Tensor & other); // {"schema": "aten::logical_or(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logical_or_(Tensor & self, const Tensor & other); // {"schema": "aten::logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logical_or_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor blackman_window(int64_t window_length, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor blackman_window(int64_t window_length, bool periodic, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bmm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::bmm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bmm_out(const Tensor & self, const Tensor & mat2, Tensor & out); // {"schema": "aten::bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector broadcast_tensors(TensorList tensors); // {"schema": "aten::broadcast_tensors(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor broadcast_to(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::broadcast_to(Tensor(a) self, SymInt[] size) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _sparse_broadcast_to(const Tensor & self, IntArrayRef size); // {"schema": "aten::_sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor cat(const ITensorListRef & tensors, int64_t dim); // {"schema": "aten::cat(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cat_out(const ITensorListRef & tensors, int64_t dim, Tensor & out); // {"schema": "aten::cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cat(TensorList tensors, Dimname dim); // {"schema": "aten::cat.names(Tensor[] tensors, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cat_out(TensorList tensors, Dimname dim, Tensor & out); // {"schema": "aten::cat.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concat(TensorList tensors, int64_t dim); // {"schema": "aten::concat(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concat_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::concat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concat(TensorList tensors, Dimname dim); // {"schema": "aten::concat.names(Tensor[] tensors, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concat_out(TensorList tensors, Dimname dim, Tensor & out); // {"schema": "aten::concat.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concatenate(TensorList tensors, int64_t dim); // {"schema": "aten::concatenate(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concatenate_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor concatenate(TensorList tensors, Dimname dim); // {"schema": "aten::concatenate.names(Tensor[] tensors, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & concatenate_out(TensorList tensors, Dimname dim, Tensor & out); // {"schema": "aten::concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor block_diag(TensorList tensors); // {"schema": "aten::block_diag(Tensor[] tensors) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ceil(const Tensor & self); // {"schema": "aten::ceil(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ceil_(Tensor & self); // {"schema": "aten::ceil_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ceil_out(const Tensor & self, Tensor & out); // {"schema": "aten::ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor chain_matmul(TensorList matrices); // {"schema": "aten::chain_matmul(Tensor[] matrices) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & chain_matmul_out(TensorList matrices, Tensor & out); // {"schema": "aten::chain_matmul.out(Tensor[] matrices, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::vector unsafe_chunk(const Tensor & self, int64_t chunks, int64_t dim); // {"schema": "aten::unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector chunk(const Tensor & self, int64_t chunks, int64_t dim); // {"schema": "aten::chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector tensor_split(const Tensor & self, c10::SymInt sections, int64_t dim); // {"schema": "aten::tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector tensor_split(const Tensor & self, c10::SymIntArrayRef indices, int64_t dim); // {"schema": "aten::tensor_split.indices(Tensor(a -> *) self, SymInt[] indices, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector tensor_split(const Tensor & self, const Tensor & tensor_indices_or_sections, int64_t dim); // {"schema": "aten::tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +Tensor clamp(const Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor clamp(const Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & clamp_(Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_(Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clamp_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_out(const Tensor & self, const c10::optional & min, const c10::optional & max, Tensor & out); // {"schema": "aten::clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & clamp_out(const Tensor & self, const c10::optional & min, const c10::optional & max, Tensor & out); // {"schema": "aten::clamp.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor clamp_max(const Tensor & self, const Scalar & max); // {"schema": "aten::clamp_max(Tensor self, Scalar max) -> Tensor", "dispatch": "True", "default": "True"} +Tensor clamp_max(const Tensor & self, const Tensor & max); // {"schema": "aten::clamp_max.Tensor(Tensor self, Tensor max) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & clamp_max_(Tensor & self, const Scalar & max); // {"schema": "aten::clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_max_(Tensor & self, const Tensor & max); // {"schema": "aten::clamp_max_.Tensor(Tensor(a!) self, Tensor max) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_max_out(const Tensor & self, const Scalar & max, Tensor & out); // {"schema": "aten::clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & clamp_max_out(const Tensor & self, const Tensor & max, Tensor & out); // {"schema": "aten::clamp_max.Tensor_out(Tensor self, Tensor max, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor clamp_min(const Tensor & self, const Scalar & min); // {"schema": "aten::clamp_min(Tensor self, Scalar min) -> Tensor", "dispatch": "True", "default": "True"} +Tensor clamp_min(const Tensor & self, const Tensor & min); // {"schema": "aten::clamp_min.Tensor(Tensor self, Tensor min) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & clamp_min_(Tensor & self, const Scalar & min); // {"schema": "aten::clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_min_(Tensor & self, const Tensor & min); // {"schema": "aten::clamp_min_.Tensor(Tensor(a!) self, Tensor min) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clamp_min_out(const Tensor & self, const Scalar & min, Tensor & out); // {"schema": "aten::clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & clamp_min_out(const Tensor & self, const Tensor & min, Tensor & out); // {"schema": "aten::clamp_min.Tensor_out(Tensor self, Tensor min, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor clip(const Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor clip(const Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clip.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & clip_(Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clip_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & clip_(Tensor & self, const c10::optional & min, const c10::optional & max); // {"schema": "aten::clip_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & clip_out(const Tensor & self, const c10::optional & min, const c10::optional & max, Tensor & out); // {"schema": "aten::clip.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & clip_out(const Tensor & self, const c10::optional & min, const c10::optional & max, Tensor & out); // {"schema": "aten::clip.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +bool cudnn_is_acceptable(const Tensor & self); // {"schema": "aten::cudnn_is_acceptable(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor complex(const Tensor & real, const Tensor & imag); // {"schema": "aten::complex(Tensor real, Tensor imag) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & complex_out(const Tensor & real, const Tensor & imag, Tensor & out); // {"schema": "aten::complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor polar(const Tensor & abs, const Tensor & angle); // {"schema": "aten::polar(Tensor abs, Tensor angle) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & polar_out(const Tensor & abs, const Tensor & angle, Tensor & out); // {"schema": "aten::polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor constant_pad_nd(const Tensor & self, c10::SymIntArrayRef pad, const Scalar & value); // {"schema": "aten::constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor contiguous(const Tensor & self, MemoryFormat memory_format); // {"schema": "aten::contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor convolution(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); // {"schema": "aten::convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward(const Tensor & grad_output, const Tensor & input, const Tensor & weight, OptionalSymIntArrayRef bias_sizes, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor convolution_overrideable(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); // {"schema": "aten::convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward_overrideable(const Tensor & grad_output, const Tensor & input, const Tensor & weight, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)", "dispatch": "True", "default": "True"} +Tensor _convolution(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32); // {"schema": "aten::_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _convolution(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, IntArrayRef output_padding, c10::SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled); // {"schema": "aten::_convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _convolution_mode(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::_convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _convolution_double_backward(const c10::optional & ggI, const c10::optional & ggW, const c10::optional & ggb, const Tensor & gO, const Tensor & weight, const Tensor & self, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::_convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor conv1d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv2d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv3d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv1d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding=\"valid\", SymInt[1] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv2d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding=\"valid\", SymInt[2] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv3d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding=\"valid\", SymInt[3] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv_tbc(const Tensor & self, const Tensor & weight, const Tensor & bias, int64_t pad); // {"schema": "aten::conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple conv_tbc_backward(const Tensor & self, const Tensor & input, const Tensor & weight, const Tensor & bias, int64_t pad); // {"schema": "aten::conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor conv_transpose1d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymInt groups, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv_transpose2d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymInt groups, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor conv_transpose3d(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymInt groups, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor copy(const Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copy_(Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _copy_from(const Tensor & self, const Tensor & dst, bool non_blocking); // {"schema": "aten::_copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _copy_from_and_resize(const Tensor & self, const Tensor & dst); // {"schema": "aten::_copy_from_and_resize(Tensor self, Tensor dst) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cos(const Tensor & self); // {"schema": "aten::cos(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cos_(Tensor & self); // {"schema": "aten::cos_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cos_out(const Tensor & self, Tensor & out); // {"schema": "aten::cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cosh(const Tensor & self); // {"schema": "aten::cosh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cosh_(Tensor & self); // {"schema": "aten::cosh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cosh_out(const Tensor & self, Tensor & out); // {"schema": "aten::cosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cosine_embedding_loss(const Tensor & input1, const Tensor & input2, const Tensor & target, double margin, int64_t reduction); // {"schema": "aten::cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor count_nonzero(const Tensor & self, IntArrayRef dim); // {"schema": "aten::count_nonzero.dim_IntList(Tensor self, int[] dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor count_nonzero(const Tensor & self, c10::optional dim); // {"schema": "aten::count_nonzero(Tensor self, int? dim=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor cov(const Tensor & self, int64_t correction, const c10::optional & fweights, const c10::optional & aweights); // {"schema": "aten::cov(Tensor self, *, int correction=1, Tensor? fweights=None, Tensor? aweights=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor corrcoef(const Tensor & self); // {"schema": "aten::corrcoef(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cudnn_affine_grid_generator(const Tensor & theta, int64_t N, int64_t C, int64_t H, int64_t W); // {"schema": "aten::cudnn_affine_grid_generator(Tensor theta, int N, int C, int H, int W) -> Tensor grid", "dispatch": "True", "default": "False"} +Tensor cudnn_affine_grid_generator_backward(const Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W); // {"schema": "aten::cudnn_affine_grid_generator_backward(Tensor grad, int N, int C, int H, int W) -> Tensor grad_theta", "dispatch": "True", "default": "False"} +::std::tuple cudnn_batch_norm(const Tensor & input, const Tensor & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double exponential_average_factor, double epsilon); // {"schema": "aten::cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple cudnn_batch_norm_backward(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_var, double epsilon, const Tensor & reserveSpace); // {"schema": "aten::cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32); // {"schema": "aten::cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & cudnn_convolution_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, Tensor & out); // {"schema": "aten::cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution_transpose(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32); // {"schema": "aten::cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _mps_convolution_transpose(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::_mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mps_convolution_transpose_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution_relu(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cudnn_convolution_add_relu(const Tensor & self, const Tensor & weight, const Tensor & z, const c10::optional & alpha, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cudnn_grid_sampler(const Tensor & self, const Tensor & grid); // {"schema": "aten::cudnn_grid_sampler(Tensor self, Tensor grid) -> Tensor output", "dispatch": "True", "default": "False"} +::std::tuple cudnn_grid_sampler_backward(const Tensor & self, const Tensor & grid, const Tensor & grad_output); // {"schema": "aten::cudnn_grid_sampler_backward(Tensor self, Tensor grid, Tensor grad_output) -> (Tensor grad_self, Tensor grad_grid)", "dispatch": "True", "default": "False"} +::std::tuple cummax(const Tensor & self, int64_t dim); // {"schema": "aten::cummax(Tensor self, int dim) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple cummax_out(const Tensor & self, int64_t dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummax.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple cummax(const Tensor & self, Dimname dim); // {"schema": "aten::cummax.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple cummax_out(const Tensor & self, Dimname dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummax.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +void _cummax_helper(const Tensor & self, Tensor & values, Tensor & indices, int64_t dim); // {"schema": "aten::_cummax_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()", "dispatch": "True", "default": "False"} +::std::tuple cummin(const Tensor & self, int64_t dim); // {"schema": "aten::cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple cummin_out(const Tensor & self, int64_t dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummin.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple cummin(const Tensor & self, Dimname dim); // {"schema": "aten::cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple cummin_out(const Tensor & self, Dimname dim, Tensor & values, Tensor & indices); // {"schema": "aten::cummin.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +void _cummin_helper(const Tensor & self, Tensor & values, Tensor & indices, int64_t dim); // {"schema": "aten::_cummin_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()", "dispatch": "True", "default": "False"} +Tensor cummaxmin_backward(const Tensor & grad, const Tensor & input, const Tensor & indices, int64_t dim); // {"schema": "aten::cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cumprod(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cumprod_(Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::cumprod_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cumprod_out(const Tensor & self, int64_t dim, c10::optional dtype, Tensor & out); // {"schema": "aten::cumprod.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cumprod(const Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cumprod_(Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::cumprod_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & cumprod_out(const Tensor & self, Dimname dim, c10::optional dtype, Tensor & out); // {"schema": "aten::cumprod.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor cumprod_backward(const Tensor & grad, const Tensor & input, int64_t dim, const Tensor & output); // {"schema": "aten::cumprod_backward(Tensor grad, Tensor input, int dim, Tensor output) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cumsum(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cumsum_(Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cumsum_out(const Tensor & self, int64_t dim, c10::optional dtype, Tensor & out); // {"schema": "aten::cumsum.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cumsum(const Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cumsum_(Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::cumsum_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & cumsum_out(const Tensor & self, Dimname dim, c10::optional dtype, Tensor & out); // {"schema": "aten::cumsum.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor cumulative_trapezoid(const Tensor & y, const Tensor & x, int64_t dim); // {"schema": "aten::cumulative_trapezoid.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cumulative_trapezoid(const Tensor & y, const Scalar & dx, int64_t dim); // {"schema": "aten::cumulative_trapezoid.dx(Tensor y, *, Scalar dx=1, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, int64_t reduction, bool zero_infinity); // {"schema": "aten::ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, int64_t reduction, bool zero_infinity); // {"schema": "aten::ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _ctc_loss(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _ctc_loss(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _ctc_loss_backward(const Tensor & grad, const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss_backward(Tensor grad, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _ctc_loss_backward(const Tensor & grad, const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity); // {"schema": "aten::_ctc_loss_backward.Tensor(Tensor grad, Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor diag_embed(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor diagflat(const Tensor & self, int64_t offset); // {"schema": "aten::diagflat(Tensor self, int offset=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor diagonal(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor linalg_diagonal(const Tensor & A, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor diagonal(const Tensor & self, Dimname outdim, Dimname dim1, Dimname dim2, int64_t offset); // {"schema": "aten::diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor diagonal_backward(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal_backward(Tensor grad_output, SymInt[] input_sizes, int offset, int dim1, int dim2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fill_diagonal_(Tensor & self, const Scalar & fill_value, bool wrap); // {"schema": "aten::fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor diff(const Tensor & self, int64_t n, int64_t dim, const c10::optional & prepend, const c10::optional & append); // {"schema": "aten::diff(Tensor self, int n=1, int dim=-1, Tensor? prepend=None, Tensor? append=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & diff_out(const Tensor & self, int64_t n, int64_t dim, const c10::optional & prepend, const c10::optional & append, Tensor & out); // {"schema": "aten::diff.out(Tensor self, int n=1, int dim=-1, Tensor? prepend=None, Tensor? append=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, const c10::optional & spacing, c10::optional dim, int64_t edge_order); // {"schema": "aten::gradient.scalarint(Tensor self, *, Scalar? spacing=None, int? dim=None, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, const Scalar & spacing, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.scalararray(Tensor self, *, Scalar spacing, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.array(Tensor self, *, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, ArrayRef spacing, c10::optional dim, int64_t edge_order); // {"schema": "aten::gradient.scalarrayint(Tensor self, *, Scalar[] spacing, int? dim=None, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, ArrayRef spacing, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.scalarrayarray(Tensor self, *, Scalar[] spacing, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, TensorList spacing, c10::optional dim, int64_t edge_order); // {"schema": "aten::gradient.tensorarrayint(Tensor self, *, Tensor[] spacing, int? dim=None, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector gradient(const Tensor & self, TensorList spacing, IntArrayRef dim, int64_t edge_order); // {"schema": "aten::gradient.tensorarray(Tensor self, *, Tensor[] spacing, int[] dim, int edge_order=1) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor div(const Tensor & self, const Tensor & other); // {"schema": "aten::div.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Tensor & other); // {"schema": "aten::div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor div(const Tensor & self, const Tensor & other, c10::optional rounding_mode); // {"schema": "aten::div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Tensor & other, c10::optional rounding_mode); // {"schema": "aten::div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Tensor & other, c10::optional rounding_mode, Tensor & out); // {"schema": "aten::div.out_mode(Tensor self, Tensor other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor div(const Tensor & self, const Scalar & other); // {"schema": "aten::div.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Scalar & other); // {"schema": "aten::div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor div(const Tensor & self, const Scalar & other, c10::optional rounding_mode); // {"schema": "aten::div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & div_(Tensor & self, const Scalar & other, c10::optional rounding_mode); // {"schema": "aten::div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor divide(const Tensor & self, const Tensor & other); // {"schema": "aten::divide.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Tensor & other); // {"schema": "aten::divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & divide_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor divide(const Tensor & self, const Scalar & other); // {"schema": "aten::divide.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Scalar & other); // {"schema": "aten::divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor divide(const Tensor & self, const Tensor & other, c10::optional rounding_mode); // {"schema": "aten::divide.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Tensor & other, c10::optional rounding_mode); // {"schema": "aten::divide_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & divide_out(const Tensor & self, const Tensor & other, c10::optional rounding_mode, Tensor & out); // {"schema": "aten::divide.out_mode(Tensor self, Tensor other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor divide(const Tensor & self, const Scalar & other, c10::optional rounding_mode); // {"schema": "aten::divide.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & divide_(Tensor & self, const Scalar & other, c10::optional rounding_mode); // {"schema": "aten::divide_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor true_divide(const Tensor & self, const Tensor & other); // {"schema": "aten::true_divide.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & true_divide_(Tensor & self, const Tensor & other); // {"schema": "aten::true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & true_divide_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor true_divide(const Tensor & self, const Scalar & other); // {"schema": "aten::true_divide.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & true_divide_(Tensor & self, const Scalar & other); // {"schema": "aten::true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor dot(const Tensor & self, const Tensor & tensor); // {"schema": "aten::dot(Tensor self, Tensor tensor) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & dot_out(const Tensor & self, const Tensor & tensor, Tensor & out); // {"schema": "aten::dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor vdot(const Tensor & self, const Tensor & other); // {"schema": "aten::vdot(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & vdot_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::vdot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor einsum(c10::string_view equation, TensorList tensors, OptionalIntArrayRef path); // {"schema": "aten::einsum(str equation, Tensor[] tensors, *, int[]? path=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor embedding(const Tensor & weight, const Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse); // {"schema": "aten::embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor embedding_backward(const Tensor & grad, const Tensor & indices, c10::SymInt num_weights, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse); // {"schema": "aten::embedding_backward(Tensor grad, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor", "dispatch": "False", "default": "True"} +Tensor embedding_dense_backward(const Tensor & grad_output, const Tensor & indices, c10::SymInt num_weights, c10::SymInt padding_idx, bool scale_grad_by_freq); // {"schema": "aten::embedding_dense_backward(Tensor grad_output, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & embedding_renorm_(Tensor & self, const Tensor & indices, double max_norm, double norm_type); // {"schema": "aten::embedding_renorm_(Tensor(a!) self, Tensor indices, float max_norm, float norm_type) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor embedding_sparse_backward(const Tensor & grad, const Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); // {"schema": "aten::embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _embedding_bag_forward_only(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx); // {"schema": "aten::_embedding_bag_forward_only(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _rowwise_prune(const Tensor & weight, const Tensor & mask, ScalarType compressed_indices_dtype); // {"schema": "aten::_rowwise_prune(Tensor weight, Tensor mask, ScalarType compressed_indices_dtype) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor row_stack(TensorList tensors); // {"schema": "aten::row_stack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & row_stack_out(TensorList tensors, Tensor & out); // {"schema": "aten::row_stack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple embedding_bag(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, bool include_last_offset); // {"schema": "aten::embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple embedding_bag(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, bool include_last_offset, c10::optional padding_idx); // {"schema": "aten::embedding_bag.padding_idx(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, bool include_last_offset, int? padding_idx) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _embedding_bag(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx); // {"schema": "aten::_embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _embedding_bag_backward(const Tensor & grad, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, int64_t padding_idx); // {"schema": "aten::_embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _embedding_bag_sparse_backward(const Tensor & grad, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, const Tensor & bag_size, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const c10::optional & per_sample_weights, int64_t padding_idx); // {"schema": "aten::_embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _embedding_bag_dense_backward(const Tensor & grad, const Tensor & indices, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const c10::optional & per_sample_weights, int64_t padding_idx); // {"schema": "aten::_embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _embedding_bag_per_sample_weights_backward(const Tensor & grad, const Tensor & weight, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, int64_t mode, int64_t padding_idx); // {"schema": "aten::_embedding_bag_per_sample_weights_backward(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor empty(IntArrayRef size, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::empty.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor empty(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::empty.memory_format(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor empty_permuted(c10::SymIntArrayRef size, IntArrayRef physical_layout, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_empty(const Tensor & self, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_empty_strided(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_full(const Tensor & self, c10::SymIntArrayRef size, const Scalar & fill_value, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_zeros(const Tensor & self, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor new_ones(const Tensor & self, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _empty_affine_quantized(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, double scale, int64_t zero_point, c10::optional memory_format); // {"schema": "aten::_empty_affine_quantized(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _empty_per_channel_affine_quantized(c10::SymIntArrayRef size, const Tensor & scales, const Tensor & zero_points, int64_t axis, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::_empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor", "dispatch": "True", "default": "False"} +const Tensor & resize_(const Tensor & self, c10::SymIntArrayRef size, c10::optional memory_format); // {"schema": "aten::resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +const Tensor & _resize_output_(const Tensor & self, c10::SymIntArrayRef size, Device device); // {"schema": "aten::_resize_output_(Tensor(a!) self, SymInt[] size, Device device) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor empty_quantized(IntArrayRef size, const Tensor & qtensor, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & empty_out(c10::SymIntArrayRef size, c10::optional memory_format, Tensor & out); // {"schema": "aten::empty.out(SymInt[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor empty_like(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor erf(const Tensor & self); // {"schema": "aten::erf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & erf_(Tensor & self); // {"schema": "aten::erf_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & erf_out(const Tensor & self, Tensor & out); // {"schema": "aten::erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor erfc(const Tensor & self); // {"schema": "aten::erfc(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & erfc_(Tensor & self); // {"schema": "aten::erfc_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & erfc_out(const Tensor & self, Tensor & out); // {"schema": "aten::erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor exp(const Tensor & self); // {"schema": "aten::exp(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & exp_(Tensor & self); // {"schema": "aten::exp_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & exp_out(const Tensor & self, Tensor & out); // {"schema": "aten::exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor exp2(const Tensor & self); // {"schema": "aten::exp2(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & exp2_(Tensor & self); // {"schema": "aten::exp2_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & exp2_out(const Tensor & self, Tensor & out); // {"schema": "aten::exp2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor expm1(const Tensor & self); // {"schema": "aten::expm1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & expm1_(Tensor & self); // {"schema": "aten::expm1_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & expm1_out(const Tensor & self, Tensor & out); // {"schema": "aten::expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor expand(const Tensor & self, c10::SymIntArrayRef size, bool implicit); // {"schema": "aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor expand_as(const Tensor & self, const Tensor & other); // {"schema": "aten::expand_as(Tensor(a) self, Tensor other) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor eye(c10::SymInt n, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::eye(SymInt n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor eye(c10::SymInt n, c10::SymInt m, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::eye.m(SymInt n, SymInt m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & eye_out(c10::SymInt n, Tensor & out); // {"schema": "aten::eye.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & eye_out(c10::SymInt n, c10::SymInt m, Tensor & out); // {"schema": "aten::eye.m_out(SymInt n, SymInt m, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor flatten(const Tensor & self, int64_t start_dim, int64_t end_dim); // {"schema": "aten::flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor flatten(const Tensor & self, int64_t start_dim, int64_t end_dim, Dimname out_dim); // {"schema": "aten::flatten.named_out_dim(Tensor(a) self, int start_dim, int end_dim, Dimname out_dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor flatten(const Tensor & self, Dimname start_dim, Dimname end_dim, Dimname out_dim); // {"schema": "aten::flatten.using_names(Tensor(a) self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor flatten(const Tensor & self, DimnameList dims, Dimname out_dim); // {"schema": "aten::flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor unflatten(const Tensor & self, int64_t dim, c10::SymIntArrayRef sizes); // {"schema": "aten::unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor unflatten(const Tensor & self, Dimname dim, c10::SymIntArrayRef sizes, DimnameList names); // {"schema": "aten::unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor fill(const Tensor & self, const Scalar & value); // {"schema": "aten::fill.Scalar(Tensor self, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor fill(const Tensor & self, const Tensor & value); // {"schema": "aten::fill.Tensor(Tensor self, Tensor value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fill_(Tensor & self, const Scalar & value); // {"schema": "aten::fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & fill_(Tensor & self, const Tensor & value); // {"schema": "aten::fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor floor(const Tensor & self); // {"schema": "aten::floor(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & floor_(Tensor & self); // {"schema": "aten::floor_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & floor_out(const Tensor & self, Tensor & out); // {"schema": "aten::floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor floor_divide(const Tensor & self, const Tensor & other); // {"schema": "aten::floor_divide(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & floor_divide_(Tensor & self, const Tensor & other); // {"schema": "aten::floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & floor_divide_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor floor_divide(const Tensor & self, const Scalar & other); // {"schema": "aten::floor_divide.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & floor_divide_(Tensor & self, const Scalar & other); // {"schema": "aten::floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor frac(const Tensor & self); // {"schema": "aten::frac(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & frac_(Tensor & self); // {"schema": "aten::frac_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & frac_out(const Tensor & self, Tensor & out); // {"schema": "aten::frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor full(IntArrayRef size, const Scalar & fill_value, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor full(c10::SymIntArrayRef size, const Scalar & fill_value, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::full(SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & full_out(c10::SymIntArrayRef size, const Scalar & fill_value, Tensor & out); // {"schema": "aten::full.out(SymInt[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor full_like(const Tensor & self, const Scalar & fill_value, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::full_like(Tensor self, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor from_file(c10::string_view filename, c10::optional shared, c10::optional size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & gcd_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::gcd.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gcd(const Tensor & self, const Tensor & other); // {"schema": "aten::gcd(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gcd_(Tensor & self, const Tensor & other); // {"schema": "aten::gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lcm_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::lcm.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lcm(const Tensor & self, const Tensor & other); // {"schema": "aten::lcm(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lcm_(Tensor & self, const Tensor & other); // {"schema": "aten::lcm_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor grid_sampler(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "False", "default": "True"} +Tensor grid_sampler_2d(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple grid_sampler_2d_backward(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); // {"schema": "aten::grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _grid_sampler_2d_cpu_fallback(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::_grid_sampler_2d_cpu_fallback(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple _grid_sampler_2d_cpu_fallback_backward(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::_grid_sampler_2d_cpu_fallback_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor grid_sampler_3d(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); // {"schema": "aten::grid_sampler_3d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple grid_sampler_3d_backward(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); // {"schema": "aten::grid_sampler_3d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor hann_window(int64_t window_length, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hann_window(int64_t window_length, bool periodic, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::hamming_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, bool periodic, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::hamming_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, bool periodic, double alpha, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::hamming_window.periodic_alpha(int window_length, bool periodic, float alpha, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hamming_window(int64_t window_length, bool periodic, double alpha, double beta, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::hamming_window.periodic_alpha_beta(int window_length, bool periodic, float alpha, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor kaiser_window(int64_t window_length, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor kaiser_window(int64_t window_length, bool periodic, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor kaiser_window(int64_t window_length, bool periodic, double beta, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor hinge_embedding_loss(const Tensor & self, const Tensor & target, double margin, int64_t reduction); // {"schema": "aten::hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor group_norm(const Tensor & input, int64_t num_groups, const c10::optional & weight, const c10::optional & bias, double eps, bool cudnn_enabled); // {"schema": "aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple native_group_norm(const Tensor & input, const c10::optional & weight, const c10::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps); // {"schema": "aten::native_group_norm(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple native_group_norm_backward(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & rstd, const c10::optional & weight, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, ::std::array output_mask); // {"schema": "aten::native_group_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, SymInt N, SymInt C, SymInt HxW, int group, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _fft_r2c(const Tensor & self, IntArrayRef dim, int64_t normalization, bool onesided); // {"schema": "aten::_fft_r2c(Tensor self, int[] dim, int normalization, bool onesided) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fft_r2c_out(const Tensor & self, IntArrayRef dim, int64_t normalization, bool onesided, Tensor & out); // {"schema": "aten::_fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _fft_c2r(const Tensor & self, IntArrayRef dim, int64_t normalization, c10::SymInt last_dim_size); // {"schema": "aten::_fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fft_c2r_out(const Tensor & self, IntArrayRef dim, int64_t normalization, c10::SymInt last_dim_size, Tensor & out); // {"schema": "aten::_fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _fft_c2c(const Tensor & self, c10::SymIntArrayRef dim, int64_t normalization, bool forward); // {"schema": "aten::_fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fft_c2c_out(const Tensor & self, c10::SymIntArrayRef dim, int64_t normalization, bool forward, Tensor & out); // {"schema": "aten::_fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +void _validate_compressed_sparse_indices(bool is_crow, const Tensor & compressed_idx, const Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); // {"schema": "aten::_validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()", "dispatch": "True", "default": "False"} +int64_t _cufft_get_plan_cache_size(DeviceIndex device_index); // {"schema": "aten::_cufft_get_plan_cache_size(DeviceIndex device_index) -> int", "dispatch": "False", "default": "True"} +int64_t _cufft_get_plan_cache_max_size(DeviceIndex device_index); // {"schema": "aten::_cufft_get_plan_cache_max_size(DeviceIndex device_index) -> int", "dispatch": "False", "default": "True"} +void _cufft_set_plan_cache_max_size(DeviceIndex device_index, int64_t max_size); // {"schema": "aten::_cufft_set_plan_cache_max_size(DeviceIndex device_index, int max_size) -> ()", "dispatch": "False", "default": "True"} +void _cufft_clear_plan_cache(DeviceIndex device_index); // {"schema": "aten::_cufft_clear_plan_cache(DeviceIndex device_index) -> ()", "dispatch": "False", "default": "True"} +Tensor index(const Tensor & self, const c10::List> & indices); // {"schema": "aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_out(const Tensor & self, const c10::List> & indices, Tensor & out); // {"schema": "aten::index.Tensor_out(Tensor self, Tensor?[] indices, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _unsafe_index(const Tensor & self, const c10::List> & indices); // {"schema": "aten::_unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_copy_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, Tensor & out); // {"schema": "aten::index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & index_copy_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_copy(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_copy_(Tensor & self, Dimname dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor index_copy(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & source); // {"schema": "aten::index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & index_put_(Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate); // {"schema": "aten::index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_put(const Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate); // {"schema": "aten::index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _unsafe_index_put(const Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate); // {"schema": "aten::_unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _index_put_impl_(Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate, bool unsafe); // {"schema": "aten::_index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor instance_norm(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled); // {"schema": "aten::instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor", "dispatch": "False", "default": "True"} +Tensor isclose(const Tensor & self, const Tensor & other, double rtol, double atol, bool equal_nan); // {"schema": "aten::isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & isin_out(const Tensor & elements, const Tensor & test_elements, bool assume_unique, bool invert, Tensor & out); // {"schema": "aten::isin.Tensor_Tensor_out(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isin(const Tensor & elements, const Tensor & test_elements, bool assume_unique, bool invert); // {"schema": "aten::isin.Tensor_Tensor(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isin_out(const Tensor & elements, const Scalar & test_element, bool assume_unique, bool invert, Tensor & out); // {"schema": "aten::isin.Tensor_Scalar_out(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isin(const Tensor & elements, const Scalar & test_element, bool assume_unique, bool invert); // {"schema": "aten::isin.Tensor_Scalar(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isin_out(const Scalar & element, const Tensor & test_elements, bool assume_unique, bool invert, Tensor & out); // {"schema": "aten::isin.Scalar_Tensor_out(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isin(const Scalar & element, const Tensor & test_elements, bool assume_unique, bool invert); // {"schema": "aten::isin.Scalar_Tensor(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor isnan(const Tensor & self); // {"schema": "aten::isnan(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +bool is_distributed(const Tensor & self); // {"schema": "aten::is_distributed(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_floating_point(const Tensor & self); // {"schema": "aten::is_floating_point(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_complex(const Tensor & self); // {"schema": "aten::is_complex(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_conj(const Tensor & self); // {"schema": "aten::is_conj(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool _is_zerotensor(const Tensor & self); // {"schema": "aten::_is_zerotensor(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_neg(const Tensor & self); // {"schema": "aten::is_neg(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor isreal(const Tensor & self); // {"schema": "aten::isreal(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +bool is_nonzero(const Tensor & self); // {"schema": "aten::is_nonzero(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_same_size(const Tensor & self, const Tensor & other); // {"schema": "aten::is_same_size(Tensor self, Tensor other) -> bool", "dispatch": "True", "default": "True"} +bool is_signed(const Tensor & self); // {"schema": "aten::is_signed(Tensor self) -> bool", "dispatch": "False", "default": "True"} +bool is_inference(const Tensor & self); // {"schema": "aten::is_inference(Tensor self) -> bool", "dispatch": "False", "default": "True"} +Tensor kl_div(const Tensor & self, const Tensor & target, int64_t reduction, bool log_target); // {"schema": "aten::kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor kron(const Tensor & self, const Tensor & other); // {"schema": "aten::kron(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & kron_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::kron.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple kthvalue(const Tensor & self, int64_t k, int64_t dim, bool keepdim); // {"schema": "aten::kthvalue(Tensor self, int k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple kthvalue_out(const Tensor & self, int64_t k, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::kthvalue.values(Tensor self, int k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple kthvalue(const Tensor & self, int64_t k, Dimname dim, bool keepdim); // {"schema": "aten::kthvalue.dimname(Tensor self, int k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple kthvalue_out(const Tensor & self, int64_t k, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::kthvalue.dimname_out(Tensor self, int k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor layer_norm(const Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional & weight, const c10::optional & bias, double eps, bool cudnn_enable); // {"schema": "aten::layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple native_layer_norm(const Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional & weight, const c10::optional & bias, double eps); // {"schema": "aten::native_layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight, Tensor? bias, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple native_layer_norm_backward(const Tensor & grad_out, const Tensor & input, c10::SymIntArrayRef normalized_shape, const Tensor & mean, const Tensor & rstd, const c10::optional & weight, const c10::optional & bias, ::std::array output_mask); // {"schema": "aten::native_layer_norm_backward(Tensor grad_out, Tensor input, SymInt[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor nan_to_num(const Tensor & self, c10::optional nan, c10::optional posinf, c10::optional neginf); // {"schema": "aten::nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & nan_to_num_(Tensor & self, c10::optional nan, c10::optional posinf, c10::optional neginf); // {"schema": "aten::nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & nan_to_num_out(const Tensor & self, c10::optional nan, c10::optional posinf, c10::optional neginf, Tensor & out); // {"schema": "aten::nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor linear(const Tensor & input, const Tensor & weight, const c10::optional & bias); // {"schema": "aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple linear_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask); // {"schema": "aten::linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & linear_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, Tensor & out); // {"schema": "aten::linear.out(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor mkldnn_linear(const Tensor & self, const Tensor & weight, const c10::optional & bias); // {"schema": "aten::mkldnn_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_linear_backward_input(IntArrayRef input_size, const Tensor & grad_output, const Tensor & weight); // {"schema": "aten::mkldnn_linear_backward_input(int[] input_size, Tensor grad_output, Tensor weight) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mkldnn_linear_backward_weights(const Tensor & grad_output, const Tensor & input, const Tensor & weight, bool bias_defined); // {"schema": "aten::mkldnn_linear_backward_weights(Tensor grad_output, Tensor input, Tensor weight, bool bias_defined) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple mkldnn_linear_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask); // {"schema": "aten::mkldnn_linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _cslt_compress(const Tensor & input); // {"schema": "aten::_cslt_compress(Tensor input) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _cslt_sparse_mm(const Tensor & compressed_A, const Tensor & dense_B, const c10::optional & bias, const c10::optional & alpha, c10::optional out_dtype, bool transpose_result, int64_t alg_id); // {"schema": "aten::_cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor", "dispatch": "True", "default": "False"} +int64_t _cslt_sparse_mm_search(const Tensor & compressed_A, const Tensor & dense_B, const c10::optional & bias, const c10::optional & alpha, c10::optional out_dtype, bool transpose_result); // {"schema": "aten::_cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int", "dispatch": "True", "default": "False"} +Tensor _sparse_semi_structured_linear(const Tensor & input, const Tensor & weight, const Tensor & meta, const c10::optional & bias, c10::optional activation, c10::optional out_dtype); // {"schema": "aten::_sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _mixed_dtypes_linear(const Tensor & input, const Tensor & weight, const Tensor & scale, const c10::optional & bias, c10::optional activation); // {"schema": "aten::_mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor fbgemm_linear_int8_weight_fp32_activation(const Tensor & input, const Tensor & weight, const Tensor & packed, const Tensor & col_offsets, const Scalar & weight_scale, const Scalar & weight_zero_point, const Tensor & bias); // {"schema": "aten::fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_linear_int8_weight(const Tensor & input, const Tensor & weight, const Tensor & packed, const Tensor & col_offsets, const Scalar & weight_scale, const Scalar & weight_zero_point, const Tensor & bias); // {"schema": "aten::fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple fbgemm_linear_quantize_weight(const Tensor & input); // {"schema": "aten::fbgemm_linear_quantize_weight(Tensor input) -> (Tensor, Tensor, float, int)", "dispatch": "False", "default": "True"} +Tensor fbgemm_pack_gemm_matrix_fp16(const Tensor & input); // {"schema": "aten::fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_linear_fp16_weight_fp32_activation(const Tensor & input, const Tensor & packed_weight, const Tensor & bias); // {"schema": "aten::fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_linear_fp16_weight(const Tensor & input, const Tensor & packed_weight, const Tensor & bias); // {"schema": "aten::fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_pack_quantized_matrix(const Tensor & input); // {"schema": "aten::fbgemm_pack_quantized_matrix(Tensor input) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fbgemm_pack_quantized_matrix(const Tensor & input, int64_t K, int64_t N); // {"schema": "aten::fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor", "dispatch": "False", "default": "True"} +Tensor ldexp(const Tensor & self, const Tensor & other); // {"schema": "aten::ldexp.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & ldexp_(Tensor & self, const Tensor & other); // {"schema": "aten::ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & ldexp_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linspace(const Scalar & start, const Scalar & end, int64_t steps, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor linspace(const Tensor & start, const Tensor & end, int64_t steps, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor linspace(const Tensor & start, const Scalar & end, int64_t steps, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor linspace(const Scalar & start, const Tensor & end, int64_t steps, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linspace_out(const Scalar & start, const Scalar & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & linspace_out(const Tensor & start, const Tensor & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & linspace_out(const Tensor & start, const Scalar & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & linspace_out(const Scalar & start, const Tensor & end, int64_t steps, Tensor & out); // {"schema": "aten::linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log(const Tensor & self); // {"schema": "aten::log(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log_(Tensor & self); // {"schema": "aten::log_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log_out(const Tensor & self, Tensor & out); // {"schema": "aten::log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log10(const Tensor & self); // {"schema": "aten::log10(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log10_(Tensor & self); // {"schema": "aten::log10_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log10_out(const Tensor & self, Tensor & out); // {"schema": "aten::log10.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log1p(const Tensor & self); // {"schema": "aten::log1p(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log1p_(Tensor & self); // {"schema": "aten::log1p_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log1p_out(const Tensor & self, Tensor & out); // {"schema": "aten::log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log2(const Tensor & self); // {"schema": "aten::log2(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log2_(Tensor & self); // {"schema": "aten::log2_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log2_out(const Tensor & self, Tensor & out); // {"schema": "aten::log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & logaddexp_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logaddexp(const Tensor & self, const Tensor & other); // {"schema": "aten::logaddexp(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logaddexp2_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::logaddexp2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logaddexp2(const Tensor & self, const Tensor & other); // {"schema": "aten::logaddexp2(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor xlogy(const Tensor & self, const Tensor & other); // {"schema": "aten::xlogy.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor xlogy(const Scalar & self, const Tensor & other); // {"schema": "aten::xlogy.Scalar_Self(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor xlogy(const Tensor & self, const Scalar & other); // {"schema": "aten::xlogy.Scalar_Other(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & xlogy_(Tensor & self, const Tensor & other); // {"schema": "aten::xlogy_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & xlogy_(Tensor & self, const Scalar & other); // {"schema": "aten::xlogy_.Scalar_Other(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & xlogy_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::xlogy.OutTensor(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & xlogy_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::xlogy.OutScalar_Self(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & xlogy_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::xlogy.OutScalar_Other(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor logspace(const Scalar & start, const Scalar & end, int64_t steps, double base, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logspace(const Tensor & start, const Tensor & end, int64_t steps, double base, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logspace(const Tensor & start, const Scalar & end, int64_t steps, double base, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor logspace(const Scalar & start, const Tensor & end, int64_t steps, double base, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logspace_out(const Scalar & start, const Scalar & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & logspace_out(const Tensor & start, const Tensor & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logspace_out(const Tensor & start, const Scalar & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & logspace_out(const Scalar & start, const Tensor & end, int64_t steps, double base, Tensor & out); // {"schema": "aten::logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log_softmax(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & log_softmax_out(const Tensor & self, int64_t dim, c10::optional dtype, Tensor & out); // {"schema": "aten::log_softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log_softmax(const Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _log_softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _log_softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _log_softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype); // {"schema": "aten::_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _log_softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype, Tensor & out); // {"schema": "aten::_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _logcumsumexp(const Tensor & self, int64_t dim); // {"schema": "aten::_logcumsumexp(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _logcumsumexp_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logcumsumexp(const Tensor & self, int64_t dim); // {"schema": "aten::logcumsumexp(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logcumsumexp_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor logcumsumexp(const Tensor & self, Dimname dim); // {"schema": "aten::logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & logcumsumexp_out(const Tensor & self, Dimname dim, Tensor & out); // {"schema": "aten::logcumsumexp.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor logsumexp(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logsumexp_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor logsumexp(const Tensor & self, DimnameList dim, bool keepdim); // {"schema": "aten::logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & logsumexp_out(const Tensor & self, DimnameList dim, bool keepdim, Tensor & out); // {"schema": "aten::logsumexp.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor margin_ranking_loss(const Tensor & input1, const Tensor & input2, const Tensor & target, double margin, int64_t reduction); // {"schema": "aten::margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor matmul(const Tensor & self, const Tensor & other); // {"schema": "aten::matmul(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple matmul_backward(const Tensor & grad, const Tensor & self, const Tensor & other, ::std::array mask); // {"schema": "aten::matmul_backward(Tensor grad, Tensor self, Tensor other, bool[2] mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & matmul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor matrix_power(const Tensor & self, int64_t n); // {"schema": "aten::matrix_power(Tensor self, int n) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & matrix_power_out(const Tensor & self, int64_t n, Tensor & out); // {"schema": "aten::matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor matrix_exp(const Tensor & self); // {"schema": "aten::matrix_exp(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor matrix_exp_backward(const Tensor & self, const Tensor & grad); // {"schema": "aten::matrix_exp_backward(Tensor self, Tensor grad) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _aminmax(const Tensor & self); // {"schema": "aten::_aminmax(Tensor self) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _aminmax(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::_aminmax.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple aminmax(const Tensor & self, c10::optional dim, bool keepdim); // {"schema": "aten::aminmax(Tensor self, *, int? dim=None, bool keepdim=False) -> (Tensor min, Tensor max)", "dispatch": "True", "default": "True"} +::std::tuple aminmax_out(const Tensor & self, c10::optional dim, bool keepdim, Tensor & min, Tensor & max); // {"schema": "aten::aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max)", "dispatch": "True", "default": "False"} +Tensor _compute_linear_combination(const Tensor & input, const Tensor & coefficients); // {"schema": "aten::_compute_linear_combination(Tensor input, Tensor coefficients) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _compute_linear_combination_out(const Tensor & input, const Tensor & coefficients, Tensor & out); // {"schema": "aten::_compute_linear_combination.out(Tensor input, Tensor coefficients, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple max(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple max_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & max, Tensor & max_values); // {"schema": "aten::max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple max(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple max_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & max, Tensor & max_values); // {"schema": "aten::max.names_dim_max(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor value_selecting_reduction_backward(const Tensor & grad, int64_t dim, const Tensor & indices, c10::SymIntArrayRef sizes, bool keepdim); // {"schema": "aten::value_selecting_reduction_backward(Tensor grad, int dim, Tensor indices, SymInt[] sizes, bool keepdim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor amax(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & amax_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple max_pool1d_with_indices(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor max_pool1d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor max_pool2d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool2d_backward(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool2d_backward(Tensor grad_output, Tensor output, Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_max_pool3d_backward(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::mkldnn_max_pool3d_backward(Tensor grad_output, Tensor output, Tensor input, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantized_max_pool1d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::quantized_max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantized_max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantized_max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::quantized_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor mean(const Tensor & self, c10::optional dtype); // {"schema": "aten::mean(Tensor self, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor mean(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mean_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::mean.out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mean(const Tensor & self, DimnameList dim, bool keepdim, c10::optional dtype); // {"schema": "aten::mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & mean_out(const Tensor & self, DimnameList dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::mean.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nanmean(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nanmean_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::nanmean.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor median(const Tensor & self); // {"schema": "aten::median(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple median(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple median_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::median.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple median(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::median.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple median_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::median.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor nanmedian(const Tensor & self); // {"schema": "aten::nanmedian(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple nanmedian(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple nanmedian_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple nanmedian(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple nanmedian_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +::std::tuple min(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple min_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & min, Tensor & min_indices); // {"schema": "aten::min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple min(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple min_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & min, Tensor & min_indices); // {"schema": "aten::min.names_dim_min(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor amin(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & amin_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _mps_convolution(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::_mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mps_convolution_backward(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); // {"schema": "aten::mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor mkldnn_convolution(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_rnn_layer(const Tensor & input, const Tensor & weight0, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & hx_, const Tensor & cx_, bool reverse, IntArrayRef batch_sizes, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train); // {"schema": "aten::mkldnn_rnn_layer(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple mkldnn_rnn_layer_backward(const Tensor & input, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & weight4, const Tensor & hx_, const Tensor & cx_tmp, const Tensor & output, const Tensor & hy_, const Tensor & cy_, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, IntArrayRef batch_sizes, bool batch_first, const Tensor & workspace); // {"schema": "aten::mkldnn_rnn_layer_backward(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple miopen_batch_norm(const Tensor & input, const Tensor & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double exponential_average_factor, double epsilon); // {"schema": "aten::miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple miopen_batch_norm_backward(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_var, double epsilon); // {"schema": "aten::miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor miopen_convolution(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); // {"schema": "aten::miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_convolution_transpose(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); // {"schema": "aten::miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_depthwise_convolution(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic); // {"schema": "aten::miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_convolution_relu(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor miopen_convolution_add_relu(const Tensor & self, const Tensor & weight, const Tensor & z, const c10::optional & alpha, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple miopen_rnn(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & hx, const c10::optional & cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const c10::optional & dropout_state); // {"schema": "aten::miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple> miopen_rnn_backward(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const c10::optional & cx, const Tensor & output, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const c10::optional & dropout_state, const Tensor & reserve, ::std::array output_mask); // {"schema": "aten::miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])", "dispatch": "True", "default": "False"} +Tensor mm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::mm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mm_out(const Tensor & self, const Tensor & mat2, Tensor & out); // {"schema": "aten::mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _int_mm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::_int_mm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _int_mm_out(const Tensor & self, const Tensor & mat2, Tensor & out); // {"schema": "aten::_int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _convert_weight_to_int4pack(const Tensor & self, int64_t innerKTiles); // {"schema": "aten::_convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _weight_int4pack_mm(const Tensor & self, const Tensor & mat2, int64_t qGroupSize, const Tensor & qScaleAndZeros); // {"schema": "aten::_weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _weight_int8pack_mm(const Tensor & self, const Tensor & mat2, const Tensor & scales); // {"schema": "aten::_weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_mm(const Tensor & sparse, const Tensor & dense); // {"schema": "aten::_sparse_mm(Tensor sparse, Tensor dense) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_mm(const Tensor & sparse, const Tensor & dense, c10::string_view reduce); // {"schema": "aten::_sparse_mm.reduce(Tensor sparse, Tensor dense, str reduce) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sparse_matmul(const Tensor & self, const Tensor & other); // {"schema": "aten::_sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple mode(const Tensor & self, int64_t dim, bool keepdim); // {"schema": "aten::mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "False"} +::std::tuple mode_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::mode.values(Tensor self, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple mode(const Tensor & self, Dimname dim, bool keepdim); // {"schema": "aten::mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple mode_out(const Tensor & self, Dimname dim, bool keepdim, Tensor & values, Tensor & indices); // {"schema": "aten::mode.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +Tensor mul(const Tensor & self, const Tensor & other); // {"schema": "aten::mul.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mul_(Tensor & self, const Tensor & other); // {"schema": "aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mul(const Tensor & self, const Scalar & other); // {"schema": "aten::mul.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mul_(Tensor & self, const Scalar & other); // {"schema": "aten::mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor multiply(const Tensor & self, const Tensor & other); // {"schema": "aten::multiply.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & multiply_(Tensor & self, const Tensor & other); // {"schema": "aten::multiply_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & multiply_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::multiply.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor multiply(const Tensor & self, const Scalar & other); // {"schema": "aten::multiply.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & multiply_(Tensor & self, const Scalar & other); // {"schema": "aten::multiply_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor mv(const Tensor & self, const Tensor & vec); // {"schema": "aten::mv(Tensor self, Tensor vec) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mv_out(const Tensor & self, const Tensor & vec, Tensor & out); // {"schema": "aten::mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mvlgamma_out(const Tensor & self, int64_t p, Tensor & out); // {"schema": "aten::mvlgamma.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mvlgamma(const Tensor & self, int64_t p); // {"schema": "aten::mvlgamma(Tensor self, int p) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mvlgamma_(Tensor & self, int64_t p); // {"schema": "aten::mvlgamma_(Tensor(a!) self, int p) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor narrow_copy(const Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); // {"schema": "aten::narrow_copy(Tensor self, int dim, SymInt start, SymInt length) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & narrow_copy_out(const Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length, Tensor & out); // {"schema": "aten::narrow_copy.out(Tensor self, int dim, SymInt start, SymInt length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor narrow(const Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); // {"schema": "aten::narrow(Tensor(a) self, int dim, SymInt start, SymInt length) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor narrow(const Tensor & self, int64_t dim, const Tensor & start, c10::SymInt length); // {"schema": "aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)", "dispatch": "False", "default": "True"} +::std::tuple native_batch_norm(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double momentum, double eps); // {"schema": "aten::native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple native_batch_norm_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd); // {"schema": "aten::native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit(const Tensor & input, const c10::optional & weight, const c10::optional & bias, Tensor & running_mean, Tensor & running_var, bool training, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit_no_training(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & running_mean, const Tensor & running_var, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit_no_training(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "True"} +::std::tuple _native_batch_norm_legit_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, Tensor & running_mean, Tensor & running_var, bool training, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd); // {"schema": "aten::_native_batch_norm_legit.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd) -> (Tensor(d!), Tensor(e!), Tensor(f!))", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit(const Tensor & input, const c10::optional & weight, const c10::optional & bias, bool training, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit.no_stats(Tensor input, Tensor? weight, Tensor? bias, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _native_batch_norm_legit_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, bool training, double momentum, double eps, Tensor & out, Tensor & save_mean, Tensor & save_invstd); // {"schema": "aten::_native_batch_norm_legit.no_stats_out(Tensor input, Tensor? weight, Tensor? bias, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_stats(const Tensor & input, double eps); // {"schema": "aten::batch_norm_stats(Tensor input, float eps) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor batch_norm_elemt(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & mean, const Tensor & invstd, double eps); // {"schema": "aten::batch_norm_elemt(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & batch_norm_elemt_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & mean, const Tensor & invstd, double eps, Tensor & out); // {"schema": "aten::batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_gather_stats(const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, int64_t count); // {"schema": "aten::batch_norm_gather_stats(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_gather_stats_with_counts(const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, const Tensor & counts); // {"schema": "aten::batch_norm_gather_stats_with_counts(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple native_batch_norm_backward(const Tensor & grad_out, const Tensor & input, const c10::optional & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_invstd, bool train, double eps, ::std::array output_mask); // {"schema": "aten::native_batch_norm_backward(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_backward_reduce(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & weight, bool input_g, bool weight_g, bool bias_g); // {"schema": "aten::batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor batch_norm_backward_elemt(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & weight, const Tensor & sum_dy, const Tensor & sum_dy_xmu, const Tensor & count); // {"schema": "aten::batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple batch_norm_update_stats(const Tensor & input, const c10::optional & running_mean, const c10::optional & running_var, double momentum); // {"schema": "aten::batch_norm_update_stats(Tensor input, Tensor? running_mean, Tensor? running_var, float momentum) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +bool is_vulkan_available(); // {"schema": "aten::is_vulkan_available() -> bool", "dispatch": "False", "default": "True"} +bool _nnpack_available(); // {"schema": "aten::_nnpack_available() -> bool", "dispatch": "False", "default": "True"} +Tensor _nnpack_spatial_convolution(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride); // {"schema": "aten::_nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ones(IntArrayRef size, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ones(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::ones(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ones_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::ones.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor ones_like(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::ones_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor pairwise_distance(const Tensor & x1, const Tensor & x2, double p, double eps, bool keepdim); // {"schema": "aten::pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor cdist(const Tensor & x1, const Tensor & x2, double p, c10::optional compute_mode); // {"schema": "aten::cdist(Tensor x1, Tensor x2, float p=2, int? compute_mode=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _euclidean_dist(const Tensor & x1, const Tensor & x2); // {"schema": "aten::_euclidean_dist(Tensor x1, Tensor x2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _cdist_forward(const Tensor & x1, const Tensor & x2, double p, c10::optional compute_mode); // {"schema": "aten::_cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _cdist_backward(const Tensor & grad, const Tensor & x1, const Tensor & x2, double p, const Tensor & cdist); // {"schema": "aten::_cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor", "dispatch": "True", "default": "False"} +Tensor pdist(const Tensor & self, double p); // {"schema": "aten::pdist(Tensor self, float p=2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _pdist_forward(const Tensor & self, double p); // {"schema": "aten::_pdist_forward(Tensor self, float p=2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _pdist_backward(const Tensor & grad, const Tensor & self, double p, const Tensor & pdist); // {"schema": "aten::_pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cosine_similarity(const Tensor & x1, const Tensor & x2, int64_t dim, double eps); // {"schema": "aten::cosine_similarity(Tensor x1, Tensor x2, int dim=1, float eps=1e-08) -> Tensor", "dispatch": "False", "default": "True"} +Tensor permute(const Tensor & self, IntArrayRef dims); // {"schema": "aten::permute(Tensor(a) self, int[] dims) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor movedim(const Tensor & self, IntArrayRef source, IntArrayRef destination); // {"schema": "aten::movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor movedim(const Tensor & self, int64_t source, int64_t destination); // {"schema": "aten::movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor moveaxis(const Tensor & self, IntArrayRef source, IntArrayRef destination); // {"schema": "aten::moveaxis.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor moveaxis(const Tensor & self, int64_t source, int64_t destination); // {"schema": "aten::moveaxis.int(Tensor(a) self, int source, int destination) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor numpy_T(const Tensor & self); // {"schema": "aten::numpy_T(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor matrix_H(const Tensor & self); // {"schema": "aten::matrix_H(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor mT(const Tensor & self); // {"schema": "aten::mT(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor mH(const Tensor & self); // {"schema": "aten::mH(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor adjoint(const Tensor & self); // {"schema": "aten::adjoint(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor pixel_shuffle(const Tensor & self, int64_t upscale_factor); // {"schema": "aten::pixel_shuffle(Tensor self, int upscale_factor) -> Tensor", "dispatch": "True", "default": "True"} +Tensor pixel_unshuffle(const Tensor & self, int64_t downscale_factor); // {"schema": "aten::pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor", "dispatch": "True", "default": "True"} +Tensor channel_shuffle(const Tensor & self, c10::SymInt groups); // {"schema": "aten::channel_shuffle(Tensor self, SymInt groups) -> Tensor", "dispatch": "True", "default": "False"} +Tensor native_channel_shuffle(const Tensor & self, c10::SymInt groups); // {"schema": "aten::native_channel_shuffle(Tensor self, SymInt groups) -> Tensor", "dispatch": "True", "default": "True"} +bool is_pinned(const Tensor & self, c10::optional device); // {"schema": "aten::is_pinned(Tensor self, Device? device=None) -> bool", "dispatch": "True", "default": "True"} +Tensor pin_memory(const Tensor & self, c10::optional device); // {"schema": "aten::pin_memory(Tensor(a) self, Device? device=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _pin_memory(const Tensor & self, c10::optional device); // {"schema": "aten::_pin_memory(Tensor self, Device? device=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor pinverse(const Tensor & self, double rcond); // {"schema": "aten::pinverse(Tensor self, float rcond=1e-15) -> Tensor", "dispatch": "False", "default": "True"} +Tensor poisson_nll_loss(const Tensor & input, const Tensor & target, bool log_input, bool full, double eps, int64_t reduction); // {"schema": "aten::poisson_nll_loss(Tensor input, Tensor target, bool log_input, bool full, float eps, int reduction) -> Tensor", "dispatch": "False", "default": "True"} +Tensor rad2deg(const Tensor & self); // {"schema": "aten::rad2deg(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rad2deg_(Tensor & self); // {"schema": "aten::rad2deg_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rad2deg_out(const Tensor & self, Tensor & out); // {"schema": "aten::rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor deg2rad(const Tensor & self); // {"schema": "aten::deg2rad(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & deg2rad_(Tensor & self); // {"schema": "aten::deg2rad_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & deg2rad_out(const Tensor & self, Tensor & out); // {"schema": "aten::deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor scalar_tensor(const Scalar & s, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::rand.names(SymInt[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, c10::optional generator, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::rand.generator_with_names(SymInt[] size, *, Generator? generator, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::rand(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rand(c10::SymIntArrayRef size, c10::optional generator, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::rand.generator(SymInt[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::rand.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, c10::optional generator, Tensor & out); // {"schema": "aten::rand.generator_out(SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor rand_like(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::rand_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt high, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randint(SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt high, c10::SymIntArrayRef size, c10::optional generator, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randint.generator(SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randint.low(SymInt low, SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, c10::optional generator, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randint.low_generator(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt high, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::randint.out(SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt high, c10::SymIntArrayRef size, c10::optional generator, Tensor & out); // {"schema": "aten::randint.generator_out(SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::randint.low_out(SymInt low, SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_out(c10::SymInt low, c10::SymInt high, c10::SymIntArrayRef size, c10::optional generator, Tensor & out); // {"schema": "aten::randint.low_generator_out(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor randint_like(const Tensor & self, c10::SymInt high, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::randint_like(Tensor self, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randint_like(const Tensor & self, c10::SymInt low, c10::SymInt high, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randn(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, c10::optional generator, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randn.generator(SymInt[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randn.names(SymInt[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randn(c10::SymIntArrayRef size, c10::optional generator, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randn.generator_with_names(SymInt[] size, *, Generator? generator, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::randn.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, c10::optional generator, Tensor & out); // {"schema": "aten::randn.generator_out(SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor randn_like(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::randn_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randperm(c10::SymInt n, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randperm(SymInt n, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor randperm(c10::SymInt n, c10::optional generator, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::randperm.generator(SymInt n, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & randperm_out(c10::SymInt n, Tensor & out); // {"schema": "aten::randperm.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randperm_out(c10::SymInt n, c10::optional generator, Tensor & out); // {"schema": "aten::randperm.generator_out(SymInt n, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor range(const Scalar & start, const Scalar & end, const Scalar & step, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::range.step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor range(const Scalar & start, const Scalar & end, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & range_out(const Scalar & start, const Scalar & end, Tensor & out); // {"schema": "aten::range.out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & range_out(const Scalar & start, const Scalar & end, const Scalar & step, Tensor & out); // {"schema": "aten::range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ravel(const Tensor & self); // {"schema": "aten::ravel(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor reciprocal(const Tensor & self); // {"schema": "aten::reciprocal(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reciprocal_(Tensor & self); // {"schema": "aten::reciprocal_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & reciprocal_out(const Tensor & self, Tensor & out); // {"schema": "aten::reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor neg(const Tensor & self); // {"schema": "aten::neg(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & neg_(Tensor & self); // {"schema": "aten::neg_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & neg_out(const Tensor & self, Tensor & out); // {"schema": "aten::neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor negative(const Tensor & self); // {"schema": "aten::negative(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & negative_(Tensor & self); // {"schema": "aten::negative_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & negative_out(const Tensor & self, Tensor & out); // {"schema": "aten::negative.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor repeat(const Tensor & self, c10::SymIntArrayRef repeats); // {"schema": "aten::repeat(Tensor self, SymInt[] repeats) -> Tensor", "dispatch": "True", "default": "True"} +Tensor repeat_interleave(const Tensor & repeats, c10::optional output_size); // {"schema": "aten::repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor repeat_interleave(const Tensor & self, const Tensor & repeats, c10::optional dim, c10::optional output_size); // {"schema": "aten::repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor repeat_interleave(const Tensor & self, c10::SymInt repeats, c10::optional dim, c10::optional output_size); // {"schema": "aten::repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor reshape(const Tensor & self, c10::SymIntArrayRef shape); // {"schema": "aten::reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _reshape_copy(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::_reshape_copy(Tensor self, SymInt[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _reshape_alias(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::_reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _mkldnn_reshape(const Tensor & self, IntArrayRef shape); // {"schema": "aten::_mkldnn_reshape(Tensor self, int[] shape) -> Tensor", "dispatch": "True", "default": "False"} +Tensor reshape_as(const Tensor & self, const Tensor & other); // {"schema": "aten::reshape_as(Tensor(a) self, Tensor other) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor round(const Tensor & self); // {"schema": "aten::round(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & round_(Tensor & self); // {"schema": "aten::round_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & round_out(const Tensor & self, Tensor & out); // {"schema": "aten::round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor round(const Tensor & self, int64_t decimals); // {"schema": "aten::round.decimals(Tensor self, *, int decimals) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & round_(Tensor & self, int64_t decimals); // {"schema": "aten::round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & round_out(const Tensor & self, int64_t decimals, Tensor & out); // {"schema": "aten::round.decimals_out(Tensor self, *, int decimals, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor rrelu(const Tensor & self, const Scalar & lower, const Scalar & upper, bool training, c10::optional generator); // {"schema": "aten::rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & rrelu_(Tensor & self, const Scalar & lower, const Scalar & upper, bool training, c10::optional generator); // {"schema": "aten::rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor relu(const Tensor & self); // {"schema": "aten::relu(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & relu_(Tensor & self); // {"schema": "aten::relu_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor relu6(const Tensor & self); // {"schema": "aten::relu6(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & relu6_(Tensor & self); // {"schema": "aten::relu6_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor prelu(const Tensor & self, const Tensor & weight); // {"schema": "aten::prelu(Tensor self, Tensor weight) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _prelu_kernel(const Tensor & self, const Tensor & weight); // {"schema": "aten::_prelu_kernel(Tensor self, Tensor weight) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _prelu_kernel_backward(const Tensor & grad_output, const Tensor & self, const Tensor & weight); // {"schema": "aten::_prelu_kernel_backward(Tensor grad_output, Tensor self, Tensor weight) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & gelu_out(const Tensor & self, c10::string_view approximate, Tensor & out); // {"schema": "aten::gelu.out(Tensor self, *, str approximate='none', Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & gelu_(Tensor & self, c10::string_view approximate); // {"schema": "aten::gelu_(Tensor(a!) self, *, str approximate='none') -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor gelu(const Tensor & self, c10::string_view approximate); // {"schema": "aten::gelu(Tensor self, *, str approximate='none') -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gelu_backward_out(const Tensor & grad_output, const Tensor & self, c10::string_view approximate, Tensor & grad_input); // {"schema": "aten::gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gelu_backward(const Tensor & grad_output, const Tensor & self, c10::string_view approximate); // {"schema": "aten::gelu_backward(Tensor grad_output, Tensor self, *, str approximate='none') -> Tensor", "dispatch": "True", "default": "True"} +Tensor infinitely_differentiable_gelu_backward(const Tensor & grad, const Tensor & self); // {"schema": "aten::infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & hardshrink_out(const Tensor & self, const Scalar & lambd, Tensor & out); // {"schema": "aten::hardshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardshrink(const Tensor & self, const Scalar & lambd); // {"schema": "aten::hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hardshrink_backward_out(const Tensor & grad_out, const Tensor & self, const Scalar & lambd, Tensor & grad_input); // {"schema": "aten::hardshrink_backward.grad_input(Tensor grad_out, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardshrink_backward(const Tensor & grad_out, const Tensor & self, const Scalar & lambd); // {"schema": "aten::hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor", "dispatch": "True", "default": "True"} +Tensor rsqrt(const Tensor & self); // {"schema": "aten::rsqrt(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rsqrt_(Tensor & self); // {"schema": "aten::rsqrt_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rsqrt_out(const Tensor & self, Tensor & out); // {"schema": "aten::rsqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor select(const Tensor & self, Dimname dim, int64_t index); // {"schema": "aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor select(const Tensor & self, int64_t dim, c10::SymInt index); // {"schema": "aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor select_backward(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt index); // {"schema": "aten::select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_select_backward(const Tensor & grad_output, const Tensor & self, int64_t dim, c10::SymInt index); // {"schema": "aten::_nested_select_backward(Tensor grad_output, Tensor self, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "False"} +Tensor selu(const Tensor & self); // {"schema": "aten::selu(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & selu_(Tensor & self); // {"schema": "aten::selu_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor celu(const Tensor & self, const Scalar & alpha); // {"schema": "aten::celu(Tensor self, Scalar alpha=1.0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & celu_(Tensor & self, const Scalar & alpha); // {"schema": "aten::celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor silu(const Tensor & self); // {"schema": "aten::silu(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & silu_(Tensor & self); // {"schema": "aten::silu_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & silu_out(const Tensor & self, Tensor & out); // {"schema": "aten::silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & silu_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & grad_input); // {"schema": "aten::silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor silu_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::silu_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor mish(const Tensor & self); // {"schema": "aten::mish(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mish_(Tensor & self); // {"schema": "aten::mish_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mish_out(const Tensor & self, Tensor & out); // {"schema": "aten::mish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mish_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::mish_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sigmoid(const Tensor & self); // {"schema": "aten::sigmoid(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sigmoid_(Tensor & self); // {"schema": "aten::sigmoid_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sigmoid_out(const Tensor & self, Tensor & out); // {"schema": "aten::sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logit(const Tensor & self, c10::optional eps); // {"schema": "aten::logit(Tensor self, float? eps=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & logit_(Tensor & self, c10::optional eps); // {"schema": "aten::logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & logit_out(const Tensor & self, c10::optional eps, Tensor & out); // {"schema": "aten::logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sin(const Tensor & self); // {"schema": "aten::sin(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sin_(Tensor & self); // {"schema": "aten::sin_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sin_out(const Tensor & self, Tensor & out); // {"schema": "aten::sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sinc(const Tensor & self); // {"schema": "aten::sinc(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sinc_(Tensor & self); // {"schema": "aten::sinc_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sinc_out(const Tensor & self, Tensor & out); // {"schema": "aten::sinc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sinh(const Tensor & self); // {"schema": "aten::sinh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sinh_(Tensor & self); // {"schema": "aten::sinh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sinh_out(const Tensor & self, Tensor & out); // {"schema": "aten::sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor detach(const Tensor & self); // {"schema": "aten::detach(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & detach_(Tensor & self); // {"schema": "aten::detach_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +int64_t size(const Tensor & self, int64_t dim); // {"schema": "aten::size.int(Tensor self, int dim) -> int", "dispatch": "False", "default": "True"} +int64_t size(const Tensor & self, Dimname dim); // {"schema": "aten::size.Dimname(Tensor self, Dimname dim) -> int", "dispatch": "False", "default": "True"} +c10::SymInt sym_size(const Tensor & self, int64_t dim); // {"schema": "aten::sym_size.int(Tensor self, int dim) -> SymInt", "dispatch": "False", "default": "True"} +c10::SymInt sym_numel(const Tensor & self); // {"schema": "aten::sym_numel(Tensor self) -> SymInt", "dispatch": "False", "default": "True"} +c10::SymInt sym_storage_offset(const Tensor & self); // {"schema": "aten::sym_storage_offset(Tensor self) -> SymInt", "dispatch": "False", "default": "True"} +Tensor slice(const Tensor & self, int64_t dim, c10::optional start, c10::optional end, c10::SymInt step); // {"schema": "aten::slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor slice_backward(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt start, c10::SymInt end, c10::SymInt step); // {"schema": "aten::slice_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt start, SymInt end, SymInt step) -> Tensor", "dispatch": "True", "default": "True"} +Tensor slice_inverse(const Tensor & self, const Tensor & src, int64_t dim, c10::optional start, c10::optional end, c10::SymInt step); // {"schema": "aten::slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor slice_scatter(const Tensor & self, const Tensor & src, int64_t dim, c10::optional start, c10::optional end, c10::SymInt step); // {"schema": "aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor select_scatter(const Tensor & self, const Tensor & src, int64_t dim, c10::SymInt index); // {"schema": "aten::select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "True"} +Tensor diagonal_scatter(const Tensor & self, const Tensor & src, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor as_strided_scatter(const Tensor & self, const Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional storage_offset); // {"schema": "aten::as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor smm(const Tensor & self, const Tensor & mat2); // {"schema": "aten::smm(Tensor self, Tensor mat2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor softmax(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & softmax_out(const Tensor & self, int64_t dim, c10::optional dtype, Tensor & out); // {"schema": "aten::softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor softmax(const Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype); // {"schema": "aten::_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype, Tensor & grad_input); // {"schema": "aten::_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector unsafe_split(const Tensor & self, c10::SymInt split_size, int64_t dim); // {"schema": "aten::unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector split(const Tensor & self, c10::SymInt split_size, int64_t dim); // {"schema": "aten::split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector split(const Tensor & self, c10::SymIntArrayRef split_size, int64_t dim); // {"schema": "aten::split.sizes(Tensor(a -> *) self, SymInt[] split_size, int dim=0) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector unsafe_split_with_sizes(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim); // {"schema": "aten::unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector split_with_sizes(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim); // {"schema": "aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector hsplit(const Tensor & self, int64_t sections); // {"schema": "aten::hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector hsplit(const Tensor & self, IntArrayRef indices); // {"schema": "aten::hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector vsplit(const Tensor & self, int64_t sections); // {"schema": "aten::vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector vsplit(const Tensor & self, IntArrayRef indices); // {"schema": "aten::vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector dsplit(const Tensor & self, int64_t sections); // {"schema": "aten::dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +::std::vector dsplit(const Tensor & self, IntArrayRef indices); // {"schema": "aten::dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +Tensor squeeze(const Tensor & self); // {"schema": "aten::squeeze(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor squeeze(const Tensor & self, int64_t dim); // {"schema": "aten::squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor squeeze(const Tensor & self, Dimname dim); // {"schema": "aten::squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor squeeze(const Tensor & self, IntArrayRef dim); // {"schema": "aten::squeeze.dims(Tensor(a) self, int[] dim) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self); // {"schema": "aten::squeeze_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self, int64_t dim); // {"schema": "aten::squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self, IntArrayRef dim); // {"schema": "aten::squeeze_.dims(Tensor(a!) self, int[] dim) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_(Tensor & self, Dimname dim); // {"schema": "aten::squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor sspaddmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & sspaddmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::sspaddmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _chunk_cat(TensorList tensors, int64_t dim, int64_t num_chunks); // {"schema": "aten::_chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _chunk_cat_out(TensorList tensors, int64_t dim, int64_t num_chunks, Tensor & out); // {"schema": "aten::_chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor stack(TensorList tensors, int64_t dim); // {"schema": "aten::stack(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & stack_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _stack(TensorList tensors, int64_t dim); // {"schema": "aten::_stack(Tensor[] tensors, int dim=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _stack_out(TensorList tensors, int64_t dim, Tensor & out); // {"schema": "aten::_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor hstack(TensorList tensors); // {"schema": "aten::hstack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & hstack_out(TensorList tensors, Tensor & out); // {"schema": "aten::hstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor vstack(TensorList tensors); // {"schema": "aten::vstack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & vstack_out(TensorList tensors, Tensor & out); // {"schema": "aten::vstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor dstack(TensorList tensors); // {"schema": "aten::dstack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & dstack_out(TensorList tensors, Tensor & out); // {"schema": "aten::dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor stft(const Tensor & self, int64_t n_fft, c10::optional hop_length, c10::optional win_length, const c10::optional & window, bool normalized, c10::optional onesided, c10::optional return_complex); // {"schema": "aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor stft(const Tensor & self, int64_t n_fft, c10::optional hop_length, c10::optional win_length, const c10::optional & window, bool center, c10::string_view pad_mode, bool normalized, c10::optional onesided, c10::optional return_complex); // {"schema": "aten::stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode=\"reflect\", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor istft(const Tensor & self, int64_t n_fft, c10::optional hop_length, c10::optional win_length, const c10::optional & window, bool center, bool normalized, c10::optional onesided, c10::optional length, bool return_complex); // {"schema": "aten::istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor", "dispatch": "False", "default": "True"} +int64_t stride(const Tensor & self, int64_t dim); // {"schema": "aten::stride.int(Tensor self, int dim) -> int", "dispatch": "False", "default": "True"} +int64_t stride(const Tensor & self, Dimname dim); // {"schema": "aten::stride.Dimname(Tensor self, Dimname dim) -> int", "dispatch": "False", "default": "True"} +c10::SymInt sym_stride(const Tensor & self, int64_t dim); // {"schema": "aten::sym_stride.int(Tensor self, int dim) -> SymInt", "dispatch": "False", "default": "True"} +Tensor sum(const Tensor & self, c10::optional dtype); // {"schema": "aten::sum(Tensor self, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sum(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sum(const Tensor & self, DimnameList dim, bool keepdim, c10::optional dtype); // {"schema": "aten::sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & sum_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::sum.IntList_out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & sum_out(const Tensor & self, DimnameList dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor _nested_sum_backward(const Tensor & grad, const Tensor & self, OptionalIntArrayRef dim, bool keepdim); // {"schema": "aten::_nested_sum_backward(Tensor grad, Tensor self, int[1]? dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor nansum(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::nansum(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & nansum_out(const Tensor & self, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::nansum.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sum_to_size(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::sum_to_size(Tensor self, SymInt[] size) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sqrt(const Tensor & self); // {"schema": "aten::sqrt(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sqrt_(Tensor & self); // {"schema": "aten::sqrt_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sqrt_out(const Tensor & self, Tensor & out); // {"schema": "aten::sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor square(const Tensor & self); // {"schema": "aten::square(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & square_(Tensor & self); // {"schema": "aten::square_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & square_out(const Tensor & self, Tensor & out); // {"schema": "aten::square.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, bool unbiased); // {"schema": "aten::std(Tensor self, bool unbiased=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::std.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple std_mean(const Tensor & self, bool unbiased); // {"schema": "aten::std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple std_mean(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::std_mean.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple std_mean(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::std_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple std_mean(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple std_mean(const Tensor & self, DimnameList dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::std_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::std.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::std.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor std(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::std.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor std(const Tensor & self, DimnameList dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & std_out(const Tensor & self, DimnameList dim, const c10::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::std.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor prod(const Tensor & self, c10::optional dtype); // {"schema": "aten::prod(Tensor self, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor prod(const Tensor & self, int64_t dim, bool keepdim, c10::optional dtype); // {"schema": "aten::prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & prod_out(const Tensor & self, int64_t dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor prod(const Tensor & self, Dimname dim, bool keepdim, c10::optional dtype); // {"schema": "aten::prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & prod_out(const Tensor & self, Dimname dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::prod.Dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor t(const Tensor & self); // {"schema": "aten::t(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & t_(Tensor & self); // {"schema": "aten::t_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor tan(const Tensor & self); // {"schema": "aten::tan(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tan_(Tensor & self); // {"schema": "aten::tan_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & tan_out(const Tensor & self, Tensor & out); // {"schema": "aten::tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tanh(const Tensor & self); // {"schema": "aten::tanh(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tanh_(Tensor & self); // {"schema": "aten::tanh_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & tanh_out(const Tensor & self, Tensor & out); // {"schema": "aten::tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tensordot(const Tensor & self, const Tensor & other, IntArrayRef dims_self, IntArrayRef dims_other); // {"schema": "aten::tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & tensordot_out(const Tensor & self, const Tensor & other, IntArrayRef dims_self, IntArrayRef dims_other, Tensor & out); // {"schema": "aten::tensordot.out(Tensor self, Tensor other, int[] dims_self, int[] dims_other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor threshold(const Tensor & self, const Scalar & threshold, const Scalar & value); // {"schema": "aten::threshold(Tensor self, Scalar threshold, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & threshold_(Tensor & self, const Scalar & threshold, const Scalar & value); // {"schema": "aten::threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & threshold_out(const Tensor & self, const Scalar & threshold, const Scalar & value, Tensor & out); // {"schema": "aten::threshold.out(Tensor self, Scalar threshold, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & threshold_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & threshold, Tensor & grad_input); // {"schema": "aten::threshold_backward.grad_input(Tensor grad_output, Tensor self, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor threshold_backward(const Tensor & grad_output, const Tensor & self, const Scalar & threshold); // {"schema": "aten::threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor", "dispatch": "True", "default": "True"} +Tensor tile(const Tensor & self, c10::SymIntArrayRef dims); // {"schema": "aten::tile(Tensor self, SymInt[] dims) -> Tensor", "dispatch": "False", "default": "True"} +Tensor transpose(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor transpose(const Tensor & self, Dimname dim0, Dimname dim1); // {"schema": "aten::transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _mkldnn_transpose(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::_mkldnn_transpose(Tensor self, int dim0, int dim1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & transpose_(Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mkldnn_transpose_(Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::_mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor one_hot(const Tensor & self, int64_t num_classes); // {"schema": "aten::one_hot(Tensor self, int num_classes=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor flip(const Tensor & self, IntArrayRef dims); // {"schema": "aten::flip(Tensor self, int[] dims) -> Tensor", "dispatch": "True", "default": "False"} +Tensor fliplr(const Tensor & self); // {"schema": "aten::fliplr(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor flipud(const Tensor & self); // {"schema": "aten::flipud(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor roll(const Tensor & self, c10::SymIntArrayRef shifts, IntArrayRef dims); // {"schema": "aten::roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor", "dispatch": "True", "default": "False"} +Tensor rot90(const Tensor & self, int64_t k, IntArrayRef dims); // {"schema": "aten::rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor", "dispatch": "True", "default": "True"} +Tensor trapezoid(const Tensor & y, const Tensor & x, int64_t dim); // {"schema": "aten::trapezoid.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trapezoid(const Tensor & y, const Scalar & dx, int64_t dim); // {"schema": "aten::trapezoid.dx(Tensor y, *, Scalar dx=1, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trapz(const Tensor & y, const Tensor & x, int64_t dim); // {"schema": "aten::trapz.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trapz(const Tensor & y, double dx, int64_t dim); // {"schema": "aten::trapz.dx(Tensor y, *, float dx=1, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _transform_bias_rescale_qkv(const Tensor & qkv, const Tensor & qkv_bias, int64_t num_heads); // {"schema": "aten::_transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_from_mask(const Tensor & t, const Tensor & mask, bool mask_check); // {"schema": "aten::_nested_tensor_from_mask(Tensor t, Tensor mask, bool mask_check=True) -> Tensor", "dispatch": "True", "default": "False"} +bool _nested_tensor_from_mask_left_aligned(const Tensor & t, const Tensor & mask); // {"schema": "aten::_nested_tensor_from_mask_left_aligned(Tensor t, Tensor mask) -> bool", "dispatch": "True", "default": "False"} +Tensor _nested_from_padded(const Tensor & padded, const Tensor & cpu_nested_shape_example, bool fuse_transform_0213); // {"schema": "aten::_nested_from_padded(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_size(const Tensor & self); // {"schema": "aten::_nested_tensor_size(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_strides(const Tensor & self); // {"schema": "aten::_nested_tensor_strides(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_storage_offsets(const Tensor & self); // {"schema": "aten::_nested_tensor_storage_offsets(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_from_padded_and_nested_example(const Tensor & padded, const Tensor & nt_example); // {"schema": "aten::_nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_view_from_buffer(const Tensor & self, const Tensor & nested_size, const Tensor & nested_strides, const Tensor & offsets); // {"schema": "aten::_nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _nested_view_from_buffer_copy(const Tensor & self, const Tensor & nested_size, const Tensor & nested_strides, const Tensor & offsets); // {"schema": "aten::_nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_view_from_jagged(const Tensor & self, const Tensor & offsets, const Tensor & dummy, const c10::optional & lengths, int64_t ragged_idx); // {"schema": "aten::_nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _nested_view_from_jagged_copy(const Tensor & self, const Tensor & offsets, const Tensor & dummy, const c10::optional & lengths, int64_t ragged_idx); // {"schema": "aten::_nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_get_values(const Tensor & self); // {"schema": "aten::_nested_get_values(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _nested_get_values_copy(const Tensor & self); // {"schema": "aten::_nested_get_values_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _nested_get_offsets(const Tensor & self); // {"schema": "aten::_nested_get_offsets(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_get_lengths(const Tensor & self); // {"schema": "aten::_nested_get_lengths(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +int64_t _nested_get_ragged_idx(const Tensor & self); // {"schema": "aten::_nested_get_ragged_idx(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor _nested_get_jagged_dummy(const Tensor & any); // {"schema": "aten::_nested_get_jagged_dummy(Tensor any) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _trilinear(const Tensor & i1, const Tensor & i2, const Tensor & i3, IntArrayRef expand1, IntArrayRef expand2, IntArrayRef expand3, IntArrayRef sumdim, int64_t unroll_dim); // {"schema": "aten::_trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor triplet_margin_loss(const Tensor & anchor, const Tensor & positive, const Tensor & negative, double margin, double p, double eps, bool swap, int64_t reduction); // {"schema": "aten::triplet_margin_loss(Tensor anchor, Tensor positive, Tensor negative, float margin=1.0, float p=2, float eps=1e-06, bool swap=False, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor trunc(const Tensor & self); // {"schema": "aten::trunc(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & trunc_(Tensor & self); // {"schema": "aten::trunc_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & trunc_out(const Tensor & self, Tensor & out); // {"schema": "aten::trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fix(const Tensor & self); // {"schema": "aten::fix(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fix_(Tensor & self); // {"schema": "aten::fix_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & fix_out(const Tensor & self, Tensor & out); // {"schema": "aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor type_as(const Tensor & self, const Tensor & other); // {"schema": "aten::type_as(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +bool _has_compatible_shallow_copy_type(const Tensor & self, const Tensor & from); // {"schema": "aten::_has_compatible_shallow_copy_type(Tensor self, Tensor from) -> bool", "dispatch": "False", "default": "True"} +::std::tuple _unique(const Tensor & self, bool sorted, bool return_inverse); // {"schema": "aten::_unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple unique_dim(const Tensor & self, int64_t dim, bool sorted, bool return_inverse, bool return_counts); // {"schema": "aten::unique_dim(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple unique_consecutive(const Tensor & self, bool return_inverse, bool return_counts, c10::optional dim); // {"schema": "aten::unique_consecutive(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple unique_dim_consecutive(const Tensor & self, int64_t dim, bool return_inverse, bool return_counts); // {"schema": "aten::unique_dim_consecutive(Tensor self, int dim, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _unique2(const Tensor & self, bool sorted, bool return_inverse, bool return_counts); // {"schema": "aten::_unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _unsafe_view(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::_unsafe_view(Tensor self, SymInt[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor unsqueeze(const Tensor & self, int64_t dim); // {"schema": "aten::unsqueeze(Tensor(a) self, int dim) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & unsqueeze_(Tensor & self, int64_t dim); // {"schema": "aten::unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor vander(const Tensor & x, c10::optional N, bool increasing); // {"schema": "aten::vander(Tensor x, int? N=None, bool increasing=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, bool unbiased); // {"schema": "aten::var(Tensor self, bool unbiased=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::var.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & var_out(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & var_out(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::var.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor var(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & var_out(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim, Tensor & out); // {"schema": "aten::var.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor var(const Tensor & self, DimnameList dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & var_out(const Tensor & self, DimnameList dim, const c10::optional & correction, bool keepdim, Tensor & out); // {"schema": "aten::var.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, bool unbiased); // {"schema": "aten::var_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, OptionalIntArrayRef dim, bool unbiased, bool keepdim); // {"schema": "aten::var_mean.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::var_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple var_mean(const Tensor & self, DimnameList dim, bool unbiased, bool keepdim); // {"schema": "aten::var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple var_mean(const Tensor & self, DimnameList dim, const c10::optional & correction, bool keepdim); // {"schema": "aten::var_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor view_as(const Tensor & self, const Tensor & other); // {"schema": "aten::view_as(Tensor(a) self, Tensor other) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor where(const Tensor & condition, const Tensor & self, const Tensor & other); // {"schema": "aten::where.self(Tensor condition, Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & where_out(const Tensor & condition, const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor where(const Tensor & condition, const Scalar & self, const Tensor & other); // {"schema": "aten::where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor where(const Tensor & condition, const Tensor & self, const Scalar & other); // {"schema": "aten::where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor where(const Tensor & condition, const Scalar & self, const Scalar & other); // {"schema": "aten::where.Scalar(Tensor condition, Scalar self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector where(const Tensor & condition); // {"schema": "aten::where(Tensor condition) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor norm_except_dim(const Tensor & v, int64_t pow, int64_t dim); // {"schema": "aten::norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _weight_norm(const Tensor & v, const Tensor & g, int64_t dim); // {"schema": "aten::_weight_norm(Tensor v, Tensor g, int dim=0) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _weight_norm_interface(const Tensor & v, const Tensor & g, int64_t dim); // {"schema": "aten::_weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _weight_norm_interface_backward(const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim); // {"schema": "aten::_weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _weight_norm_differentiable_backward(const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim); // {"schema": "aten::_weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor zeros(IntArrayRef size, c10::optional names, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _efficientzerotensor(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_efficientzerotensor(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor zeros(c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & zeros_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::zeros.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor zeros_like(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional memory_format); // {"schema": "aten::zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _standard_gamma_grad(const Tensor & self, const Tensor & output); // {"schema": "aten::_standard_gamma_grad(Tensor self, Tensor output) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _standard_gamma(const Tensor & self, c10::optional generator); // {"schema": "aten::_standard_gamma(Tensor self, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _dirichlet_grad(const Tensor & x, const Tensor & alpha, const Tensor & total); // {"schema": "aten::_dirichlet_grad(Tensor x, Tensor alpha, Tensor total) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sample_dirichlet(const Tensor & self, c10::optional generator); // {"schema": "aten::_sample_dirichlet(Tensor self, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor poisson(const Tensor & self, c10::optional generator); // {"schema": "aten::poisson(Tensor self, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor binomial(const Tensor & count, const Tensor & prob, c10::optional generator); // {"schema": "aten::binomial(Tensor count, Tensor prob, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor native_norm(const Tensor & self, const Scalar & p); // {"schema": "aten::native_norm(Tensor self, Scalar p=2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor native_norm(const Tensor & self, const c10::optional & p, IntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::native_norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, ScalarType? dtype) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_sum(const Tensor & self); // {"schema": "aten::_sparse_sum(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sum(const Tensor & self, ScalarType dtype); // {"schema": "aten::_sparse_sum.dtype(Tensor self, *, ScalarType dtype) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sum(const Tensor & self, IntArrayRef dim); // {"schema": "aten::_sparse_sum.dim(Tensor self, int[1] dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _sparse_sum(const Tensor & self, IntArrayRef dim, ScalarType dtype); // {"schema": "aten::_sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_sum_backward(const Tensor & grad, const Tensor & self, IntArrayRef dim); // {"schema": "aten::_sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_csr_sum(const Tensor & self, IntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::_sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_csr_prod(const Tensor & self, IntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::_sparse_csr_prod.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_softmax(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::_sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_softmax(const Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::_sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self); // {"schema": "aten::_sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_log_softmax(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::_sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_log_softmax(const Tensor & self, Dimname dim, c10::optional dtype); // {"schema": "aten::_sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_log_softmax(const Tensor & self, int64_t dim, bool half_to_float); // {"schema": "aten::_sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_log_softmax_backward_data(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self); // {"schema": "aten::_sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _spdiags(const Tensor & diagonals, const Tensor & offsets, IntArrayRef shape, c10::optional layout); // {"schema": "aten::_spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor norm(const Tensor & self, const c10::optional & p, ScalarType dtype); // {"schema": "aten::norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor norm(const Tensor & self, const Scalar & p); // {"schema": "aten::norm.Scalar(Tensor self, Scalar p=2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor norm(const Tensor & self, const c10::optional & p, IntArrayRef dim, bool keepdim, ScalarType dtype); // {"schema": "aten::norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor norm(const Tensor & self, const c10::optional & p, IntArrayRef dim, bool keepdim); // {"schema": "aten::norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & norm_out(const Tensor & self, const c10::optional & p, IntArrayRef dim, bool keepdim, ScalarType dtype, Tensor & out); // {"schema": "aten::norm.dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & norm_out(const Tensor & self, const c10::optional & p, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::norm.out(Tensor self, Scalar? p, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor norm(const Tensor & self, const c10::optional & p, DimnameList dim, bool keepdim, ScalarType dtype); // {"schema": "aten::norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor", "dispatch": "False", "default": "True"} +Tensor norm(const Tensor & self, const c10::optional & p, DimnameList dim, bool keepdim); // {"schema": "aten::norm.names_ScalarOpt_dim(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & norm_out(const Tensor & self, const c10::optional & p, DimnameList dim, bool keepdim, ScalarType dtype, Tensor & out); // {"schema": "aten::norm.names_dtype_out(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & norm_out(const Tensor & self, const c10::optional & p, DimnameList dim, bool keepdim, Tensor & out); // {"schema": "aten::norm.names_out(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple frexp(const Tensor & self); // {"schema": "aten::frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent)", "dispatch": "True", "default": "True"} +::std::tuple frexp_out(const Tensor & self, Tensor & mantissa, Tensor & exponent); // {"schema": "aten::frexp.Tensor_out(Tensor self, *, Tensor(a!) mantissa, Tensor(b!) exponent) -> (Tensor(a!) mantissa, Tensor(b!) exponent)", "dispatch": "True", "default": "False"} +Tensor frobenius_norm(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & frobenius_norm_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::frobenius_norm.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nuclear_norm(const Tensor & self, bool keepdim); // {"schema": "aten::nuclear_norm(Tensor self, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nuclear_norm_out(const Tensor & self, bool keepdim, Tensor & out); // {"schema": "aten::nuclear_norm.out(Tensor self, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nuclear_norm(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::nuclear_norm.dim(Tensor self, int[2] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nuclear_norm_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::nuclear_norm.dim_out(Tensor self, int[2] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor clone(const Tensor & self, c10::optional memory_format); // {"schema": "aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor positive(const Tensor & self); // {"schema": "aten::positive(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +const Tensor & resize_as_(const Tensor & self, const Tensor & the_template, c10::optional memory_format); // {"schema": "aten::resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & resize_as_sparse_(const Tensor & self, const Tensor & the_template); // {"schema": "aten::resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & zero_(Tensor & self); // {"schema": "aten::zero_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & sub_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sub(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sub_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor sub(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sub_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & subtract_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::subtract.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor subtract(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::subtract.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & subtract_(Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::subtract_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor subtract(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::subtract.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & subtract_(Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::subtract_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor rsub(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & heaviside_out(const Tensor & self, const Tensor & values, Tensor & out); // {"schema": "aten::heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor heaviside(const Tensor & self, const Tensor & values); // {"schema": "aten::heaviside(Tensor self, Tensor values) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & heaviside_(Tensor & self, const Tensor & values); // {"schema": "aten::heaviside_(Tensor(a!) self, Tensor values) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor rsub(const Tensor & self, const Scalar & other, const Scalar & alpha); // {"schema": "aten::rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _sparse_addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::_sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sparse_sampled_addmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::sparse_sampled_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sparse_sampled_addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _sparse_mm_reduce_impl(const Tensor & self, const Tensor & other, c10::string_view reduce); // {"schema": "aten::_sparse_mm_reduce_impl(Tensor self, Tensor other, str reduce) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _sparse_mm_reduce_impl_backward(const Tensor & self, const Tensor & grad_out, const Tensor & weight, c10::string_view reduce, const Tensor & arg_out, ::std::array output_mask); // {"schema": "aten::_sparse_mm_reduce_impl_backward(Tensor self, Tensor grad_out, Tensor weight, str reduce, Tensor arg_out, bool[2] output_mask) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & addmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addmm_(Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _addmm_activation_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, bool use_gelu, Tensor & out); // {"schema": "aten::_addmm_activation.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _addmm_activation(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, bool use_gelu); // {"schema": "aten::_addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple _scaled_mm(const Tensor & self, const Tensor & mat2, const c10::optional & bias, c10::optional out_dtype, const c10::optional & scale_a, const c10::optional & scale_b, const c10::optional & scale_result, bool use_fast_accum); // {"schema": "aten::_scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_mm_out(const Tensor & self, const Tensor & mat2, const c10::optional & bias, c10::optional out_dtype, const c10::optional & scale_a, const c10::optional & scale_b, const c10::optional & scale_result, bool use_fast_accum, Tensor & out, Tensor & out_amax); // {"schema": "aten::_scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +Tensor sparse_compressed_tensor(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_csr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_csr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_csc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_csc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_bsr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_bsc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_compressed_tensor(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_compressed_tensor.comp_plain_value(Tensor compressed_indices, Tensor plain_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_csr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_csr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_csc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_csc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsr_tensor(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_bsc_tensor(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_compressed_tensor_unsafe(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_csr_tensor_unsafe(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_csc_tensor_unsafe(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_bsr_tensor_unsafe(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_bsc_tensor_unsafe(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_coo_tensor(IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::sparse_coo_tensor.size(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional is_coalesced); // {"schema": "aten::sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional is_coalesced); // {"schema": "aten::sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _sparse_coo_tensor_unsafe(const Tensor & indices, const Tensor & values, c10::SymIntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional is_coalesced); // {"schema": "aten::_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor", "dispatch": "False", "default": "True"} +void _validate_sparse_coo_tensor_args(const Tensor & indices, const Tensor & values, IntArrayRef size, c10::optional is_coalesced); // {"schema": "aten::_validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_compressed_tensor_args(const Tensor & compressed_indices, const Tensor & plain_indices, const Tensor & values, IntArrayRef size, Layout layout); // {"schema": "aten::_validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_csr_tensor_args(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_csc_tensor_args(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_bsr_tensor_args(const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +void _validate_sparse_bsc_tensor_args(const Tensor & ccol_indices, const Tensor & row_indices, const Tensor & values, IntArrayRef size); // {"schema": "aten::_validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()", "dispatch": "False", "default": "True"} +Tensor _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, IntArrayRef size, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const Tensor & indices, const Tensor & values, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, c10::optional is_coalesced); // {"schema": "aten::_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor", "dispatch": "True", "default": "False"} +const Tensor & sparse_resize_(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)", "dispatch": "True", "default": "False"} +const Tensor & sparse_resize_and_clear_(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sparse_mask(const Tensor & self, const Tensor & mask); // {"schema": "aten::sparse_mask(Tensor self, Tensor mask) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _sparse_mask_projection(const Tensor & self, const Tensor & mask, bool accumulate_matches); // {"schema": "aten::_sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector _to_cpu(TensorList tensors); // {"schema": "aten::_to_cpu(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor to_dense(const Tensor & self, c10::optional dtype, c10::optional masked_grad); // {"schema": "aten::to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_dense(const Tensor & self, c10::optional dtype, c10::optional masked_grad); // {"schema": "aten::_to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_dense_backward(const Tensor & grad, const Tensor & input, c10::optional masked_grad); // {"schema": "aten::to_dense_backward(Tensor grad, Tensor input, bool? masked_grad=None) -> Tensor", "dispatch": "False", "default": "True"} +int64_t sparse_dim(const Tensor & self); // {"schema": "aten::sparse_dim(Tensor self) -> int", "dispatch": "True", "default": "False"} +int64_t _dimI(const Tensor & self); // {"schema": "aten::_dimI(Tensor self) -> int", "dispatch": "True", "default": "False"} +int64_t dense_dim(const Tensor & self); // {"schema": "aten::dense_dim(Tensor self) -> int", "dispatch": "True", "default": "False"} +int64_t _dimV(const Tensor & self); // {"schema": "aten::_dimV(Tensor self) -> int", "dispatch": "True", "default": "False"} +int64_t _nnz(const Tensor & self); // {"schema": "aten::_nnz(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor coalesce(const Tensor & self); // {"schema": "aten::coalesce(Tensor(a) self) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _coalesce(const Tensor & self); // {"schema": "aten::_coalesce(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +bool is_coalesced(const Tensor & self); // {"schema": "aten::is_coalesced(Tensor self) -> bool", "dispatch": "True", "default": "True"} +Tensor _indices(const Tensor & self); // {"schema": "aten::_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor _values(const Tensor & self); // {"schema": "aten::_values(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor & _coalesced_(Tensor & self, bool coalesced); // {"schema": "aten::_coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor indices(const Tensor & self); // {"schema": "aten::indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor values(const Tensor & self); // {"schema": "aten::values(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor crow_indices(const Tensor & self); // {"schema": "aten::crow_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor col_indices(const Tensor & self); // {"schema": "aten::col_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor ccol_indices(const Tensor & self); // {"schema": "aten::ccol_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor row_indices(const Tensor & self); // {"schema": "aten::row_indices(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & hspmm_out(const Tensor & mat1, const Tensor & mat2, Tensor & out); // {"schema": "aten::hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hspmm(const Tensor & mat1, const Tensor & mat2); // {"schema": "aten::hspmm(Tensor mat1, Tensor mat2) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & copy_sparse_to_sparse_(Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy_sparse_to_sparse_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector unbind(const Tensor & self, int64_t dim); // {"schema": "aten::unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]", "dispatch": "True", "default": "True"} +::std::vector unbind(const Tensor & self, Dimname dim); // {"schema": "aten::unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]", "dispatch": "False", "default": "True"} +Tensor to_sparse(const Tensor & self, int64_t sparse_dim); // {"schema": "aten::to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse(const Tensor & self, int64_t sparse_dim); // {"schema": "aten::_to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse(const Tensor & self, c10::optional layout, OptionalIntArrayRef blocksize, c10::optional dense_dim); // {"schema": "aten::to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse(const Tensor & self, c10::optional layout, OptionalIntArrayRef blocksize, c10::optional dense_dim); // {"schema": "aten::_to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_csr(const Tensor & self, c10::optional dense_dim); // {"schema": "aten::to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_csr(const Tensor & self, c10::optional dense_dim); // {"schema": "aten::_to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_csc(const Tensor & self, c10::optional dense_dim); // {"schema": "aten::to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_csc(const Tensor & self, c10::optional dense_dim); // {"schema": "aten::_to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_bsr(const Tensor & self, IntArrayRef blocksize, c10::optional dense_dim); // {"schema": "aten::to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_bsr(const Tensor & self, IntArrayRef blocksize, c10::optional dense_dim); // {"schema": "aten::_to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_sparse_bsc(const Tensor & self, IntArrayRef blocksize, c10::optional dense_dim); // {"schema": "aten::to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _to_sparse_bsc(const Tensor & self, IntArrayRef blocksize, c10::optional dense_dim); // {"schema": "aten::_to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _to_sparse_semi_structured(const Tensor & dense); // {"schema": "aten::_to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor to_mkldnn(const Tensor & self, c10::optional dtype); // {"schema": "aten::to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_reorder_conv2d_weight(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, OptionalSymIntArrayRef input_size); // {"schema": "aten::mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor mkldnn_reorder_conv3d_weight(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups); // {"schema": "aten::mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor to_mkldnn_backward(const Tensor & grad, const Tensor & input); // {"schema": "aten::to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantize_per_tensor_dynamic(const Tensor & self, ScalarType dtype, bool reduce_range); // {"schema": "aten::quantize_per_tensor_dynamic(Tensor self, ScalarType dtype, bool reduce_range) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantize_per_tensor(const Tensor & self, double scale, int64_t zero_point, ScalarType dtype); // {"schema": "aten::quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "False"} +Tensor quantize_per_tensor(const Tensor & self, const Tensor & scale, const Tensor & zero_point, ScalarType dtype); // {"schema": "aten::quantize_per_tensor.tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector quantize_per_tensor(TensorList tensors, const Tensor & scales, const Tensor & zero_points, ScalarType dtype); // {"schema": "aten::quantize_per_tensor.tensors(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype) -> Tensor[]", "dispatch": "True", "default": "False"} +Tensor quantize_per_channel(const Tensor & self, const Tensor & scales, const Tensor & zero_points, int64_t axis, ScalarType dtype); // {"schema": "aten::quantize_per_channel(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "False"} +Tensor dequantize(const Tensor & self); // {"schema": "aten::dequantize.self(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector dequantize(TensorList tensors); // {"schema": "aten::dequantize.tensors(Tensor[] tensors) -> Tensor[]", "dispatch": "True", "default": "False"} +double q_scale(const Tensor & self); // {"schema": "aten::q_scale(Tensor self) -> float", "dispatch": "True", "default": "False"} +int64_t q_zero_point(const Tensor & self); // {"schema": "aten::q_zero_point(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor q_per_channel_scales(const Tensor & self); // {"schema": "aten::q_per_channel_scales(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor q_per_channel_zero_points(const Tensor & self); // {"schema": "aten::q_per_channel_zero_points(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +int64_t q_per_channel_axis(const Tensor & self); // {"schema": "aten::q_per_channel_axis(Tensor self) -> int", "dispatch": "True", "default": "False"} +Tensor int_repr(const Tensor & self); // {"schema": "aten::int_repr(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _make_per_tensor_quantized_tensor(const Tensor & self, double scale, int64_t zero_point); // {"schema": "aten::_make_per_tensor_quantized_tensor(Tensor self, float scale, int zero_point) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _make_per_channel_quantized_tensor(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis); // {"schema": "aten::_make_per_channel_quantized_tensor(Tensor self, Tensor scale, Tensor zero_point, int axis) -> Tensor", "dispatch": "True", "default": "False"} +QScheme qscheme(const Tensor & self); // {"schema": "aten::qscheme(Tensor self) -> QScheme", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_tensor_affine(const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fake_quantize_per_tensor_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_tensor_affine.tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple fake_quantize_per_tensor_affine_cachemask(const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_tensor_affine_cachemask(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +::std::tuple _fake_quantize_per_tensor_affine_cachemask_tensor_qparams(const Tensor & self, const Tensor & scale, const Tensor & zero_point, const Tensor & fake_quant_enabled, int64_t quant_min, int64_t quant_max); // {"schema": "aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, Tensor fake_quant_enabled, int quant_min, int quant_max) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_tensor_affine_cachemask_backward(const Tensor & grad, const Tensor & mask); // {"schema": "aten::fake_quantize_per_tensor_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _fake_quantize_learnable_per_tensor_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_tensor_affine(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _fake_quantize_learnable_per_tensor_affine_backward(const Tensor & grad, const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_tensor_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_channel_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple fake_quantize_per_channel_affine_cachemask(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max); // {"schema": "aten::fake_quantize_per_channel_affine_cachemask(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +Tensor fake_quantize_per_channel_affine_cachemask_backward(const Tensor & grad, const Tensor & mask); // {"schema": "aten::fake_quantize_per_channel_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _fake_quantize_learnable_per_channel_affine(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _fake_quantize_learnable_per_channel_affine_backward(const Tensor & grad, const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); // {"schema": "aten::_fake_quantize_learnable_per_channel_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor fused_moving_avg_obs_fake_quant(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, Tensor & running_min, Tensor & running_max, Tensor & scale, Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); // {"schema": "aten::fused_moving_avg_obs_fake_quant(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _fused_moving_avg_obs_fq_helper(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, Tensor & running_min, Tensor & running_max, Tensor & scale, Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); // {"schema": "aten::_fused_moving_avg_obs_fq_helper(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> (Tensor output, Tensor mask)", "dispatch": "True", "default": "False"} +::std::tuple _choose_qparams_per_tensor(const Tensor & self, bool reduce_range); // {"schema": "aten::_choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int)", "dispatch": "False", "default": "True"} +Tensor _saturate_weight_to_fp16(const Tensor & weight); // {"schema": "aten::_saturate_weight_to_fp16(Tensor weight) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple choose_qparams_optimized(const Tensor & input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width); // {"schema": "aten::choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor _autocast_to_reduced_precision(const Tensor & self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype); // {"schema": "aten::_autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _autocast_to_full_precision(const Tensor & self, bool cuda_enabled, bool cpu_enabled); // {"schema": "aten::_autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor _to_copy(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, bool non_blocking, c10::optional memory_format); // {"schema": "aten::_to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor to(const Tensor & self, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, bool non_blocking, bool copy, c10::optional memory_format); // {"schema": "aten::to.dtype_layout(Tensor(a) self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor to(const Tensor & self, Device device, ScalarType dtype, bool non_blocking, bool copy, c10::optional memory_format); // {"schema": "aten::to.device(Tensor(a) self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor to(const Tensor & self, ScalarType dtype, bool non_blocking, bool copy, c10::optional memory_format); // {"schema": "aten::to.dtype(Tensor(a) self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor to(const Tensor & self, const Tensor & other, bool non_blocking, bool copy, c10::optional memory_format); // {"schema": "aten::to.other(Tensor(a) self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)", "dispatch": "False", "default": "True"} +::std::vector meshgrid(TensorList tensors); // {"schema": "aten::meshgrid(Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +::std::vector meshgrid(TensorList tensors, c10::string_view indexing); // {"schema": "aten::meshgrid.indexing(Tensor[] tensors, *, str indexing) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor cartesian_prod(TensorList tensors); // {"schema": "aten::cartesian_prod(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor combinations(const Tensor & self, int64_t r, bool with_replacement); // {"schema": "aten::combinations(Tensor self, int r=2, bool with_replacement=False) -> Tensor", "dispatch": "False", "default": "True"} +Scalar item(const Tensor & self); // {"schema": "aten::item(Tensor self) -> Scalar", "dispatch": "False", "default": "True"} +ScalarType result_type(const Tensor & tensor, const Tensor & other); // {"schema": "aten::result_type.Tensor(Tensor tensor, Tensor other) -> ScalarType", "dispatch": "False", "default": "True"} +ScalarType result_type(const Tensor & tensor, const Scalar & other); // {"schema": "aten::result_type.Scalar(Tensor tensor, Scalar other) -> ScalarType", "dispatch": "False", "default": "True"} +ScalarType result_type(const Scalar & scalar, const Tensor & tensor); // {"schema": "aten::result_type.Scalar_Tensor(Scalar scalar, Tensor tensor) -> ScalarType", "dispatch": "False", "default": "True"} +ScalarType result_type(const Scalar & scalar1, const Scalar & scalar2); // {"schema": "aten::result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType", "dispatch": "False", "default": "True"} +bool can_cast(ScalarType from, ScalarType to); // {"schema": "aten::can_cast(ScalarType from, ScalarType to) -> bool", "dispatch": "False", "default": "True"} +ScalarType promote_types(ScalarType type1, ScalarType type2); // {"schema": "aten::promote_types(ScalarType type1, ScalarType type2) -> ScalarType", "dispatch": "False", "default": "True"} +Scalar _local_scalar_dense(const Tensor & self); // {"schema": "aten::_local_scalar_dense(Tensor self) -> Scalar", "dispatch": "True", "default": "False"} +::std::tuple _lstm_mps(const Tensor & input, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::_lstm_mps(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple,::std::vector> lstm_mps_backward(const c10::optional & grad_y, const c10::optional & grad_hy, const c10::optional & grad_cy, const Tensor & z_state, const Tensor & cell_state_fwd, const Tensor & input, const Tensor & layersOutputs, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::lstm_mps_backward(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_lstm_cell(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & cx, const c10::optional & input_bias, const c10::optional & hidden_bias); // {"schema": "aten::_thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_lstm_cell_backward_impl(const c10::optional & grad_hy, const c10::optional & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias); // {"schema": "aten::_thnn_fused_lstm_cell_backward_impl(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_lstm_cell_backward(const c10::optional & grad_hy, const c10::optional & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias); // {"schema": "aten::_thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _thnn_differentiable_lstm_cell_backward(const c10::optional & grad_hy, const c10::optional & grad_cy, const Tensor & input_gates, const Tensor & hidden_gates, const c10::optional & input_bias, const c10::optional & hidden_bias, const Tensor & cx, const Tensor & cy); // {"schema": "aten::_thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _thnn_fused_gru_cell(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const c10::optional & input_bias, const c10::optional & hidden_bias); // {"schema": "aten::_thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_fused_gru_cell_backward(const Tensor & grad_hy, const Tensor & workspace, bool has_bias); // {"schema": "aten::_thnn_fused_gru_cell_backward(Tensor grad_hy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _thnn_differentiable_gru_cell_backward(const Tensor & grad_hy, const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const c10::optional & input_bias, const c10::optional & hidden_bias); // {"schema": "aten::_thnn_differentiable_gru_cell_backward(Tensor grad_hy, Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias, Tensor? hidden_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple lstm(const Tensor & input, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple lstm(const Tensor & data, const Tensor & batch_sizes, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple gru(const Tensor & input, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple gru(const Tensor & data, const Tensor & batch_sizes, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_tanh(const Tensor & input, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::rnn_tanh.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_tanh(const Tensor & data, const Tensor & batch_sizes, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::rnn_tanh.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_relu(const Tensor & input, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first); // {"schema": "aten::rnn_relu.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple rnn_relu(const Tensor & data, const Tensor & batch_sizes, const Tensor & hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional); // {"schema": "aten::rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple lstm_cell(const Tensor & input, TensorList hx, const Tensor & w_ih, const Tensor & w_hh, const c10::optional & b_ih, const c10::optional & b_hh); // {"schema": "aten::lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor gru_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const c10::optional & b_ih, const c10::optional & b_hh); // {"schema": "aten::gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor rnn_tanh_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const c10::optional & b_ih, const c10::optional & b_hh); // {"schema": "aten::rnn_tanh_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor rnn_relu_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const c10::optional & b_ih, const c10::optional & b_hh); // {"schema": "aten::rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple quantized_lstm_cell(const Tensor & input, TensorList hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor quantized_gru_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantized_rnn_relu_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantized_rnn_tanh_cell(const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh); // {"schema": "aten::quantized_rnn_tanh_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _pack_padded_sequence(const Tensor & input, const Tensor & lengths, bool batch_first); // {"schema": "aten::_pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor _pack_padded_sequence_backward(const Tensor & grad, c10::SymIntArrayRef input_size, const Tensor & batch_sizes, bool batch_first); // {"schema": "aten::_pack_padded_sequence_backward(Tensor grad, SymInt[] input_size, Tensor batch_sizes, bool batch_first) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple _pad_packed_sequence(const Tensor & data, const Tensor & batch_sizes, bool batch_first, const Scalar & padding_value, int64_t total_length); // {"schema": "aten::_pad_packed_sequence(Tensor data, Tensor batch_sizes, bool batch_first, Scalar padding_value, int total_length) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +Tensor & set_(Tensor & self, Storage source); // {"schema": "aten::set_.source_Storage(Tensor(a!) self, Storage source) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & set_(Tensor & self, Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & set_(Tensor & self, const Tensor & source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & set_(Tensor & self, const Tensor & source); // {"schema": "aten::set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & set_(Tensor & self); // {"schema": "aten::set_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lift(const Tensor & self); // {"schema": "aten::lift(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor lift_fresh(const Tensor & self); // {"schema": "aten::lift_fresh(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor lift_fresh_copy(const Tensor & self); // {"schema": "aten::lift_fresh_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +bool is_set_to(const Tensor & self, const Tensor & tensor); // {"schema": "aten::is_set_to(Tensor self, Tensor tensor) -> bool", "dispatch": "True", "default": "False"} +Tensor & masked_fill_(Tensor & self, const Tensor & mask, const Scalar & value); // {"schema": "aten::masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_fill(const Tensor & self, const Tensor & mask, const Scalar & value); // {"schema": "aten::masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & masked_fill_(Tensor & self, const Tensor & mask, const Tensor & value); // {"schema": "aten::masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_fill(const Tensor & self, const Tensor & mask, const Tensor & value); // {"schema": "aten::masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & masked_scatter_(Tensor & self, const Tensor & mask, const Tensor & source); // {"schema": "aten::masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_scatter(const Tensor & self, const Tensor & mask, const Tensor & source); // {"schema": "aten::masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor masked_scatter_backward(const Tensor & grad_output, const Tensor & mask, c10::SymIntArrayRef sizes); // {"schema": "aten::masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _masked_softmax(const Tensor & self, const Tensor & mask, c10::optional dim, c10::optional mask_type); // {"schema": "aten::_masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _masked_softmax_backward(const Tensor & grad_output, const Tensor & output, const Tensor & mask, c10::optional dim); // {"schema": "aten::_masked_softmax_backward(Tensor grad_output, Tensor output, Tensor mask, int? dim=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor view(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::view(Tensor(a) self, SymInt[] size) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor view(const Tensor & self, ScalarType dtype); // {"schema": "aten::view.dtype(Tensor(a) self, ScalarType dtype) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor & put_(Tensor & self, const Tensor & index, const Tensor & source, bool accumulate); // {"schema": "aten::put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor put(const Tensor & self, const Tensor & index, const Tensor & source, bool accumulate); // {"schema": "aten::put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_add_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha, Tensor & out); // {"schema": "aten::index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & index_add_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha); // {"schema": "aten::index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_add(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha); // {"schema": "aten::index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor index_add(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & source, const Scalar & alpha); // {"schema": "aten::index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & index_reduce_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, c10::string_view reduce, bool include_self, Tensor & out); // {"schema": "aten::index_reduce.out(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & index_reduce_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, c10::string_view reduce, bool include_self); // {"schema": "aten::index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor index_reduce(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, c10::string_view reduce, bool include_self); // {"schema": "aten::index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_fill_(Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor index_fill(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_fill_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor index_fill(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & index_fill_(Tensor & self, Dimname dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & index_fill_(Tensor & self, Dimname dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill_.Dimname_Tensor(Tensor(a!) self, Dimname dim, Tensor index, Tensor value) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor index_fill(const Tensor & self, Dimname dim, const Tensor & index, const Scalar & value); // {"schema": "aten::index_fill.Dimname_Scalar(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor", "dispatch": "False", "default": "True"} +Tensor index_fill(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & value); // {"schema": "aten::index_fill.Dimname_Tensor(Tensor self, Dimname dim, Tensor index, Tensor value) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, Tensor & out); // {"schema": "aten::scatter.src_out(Tensor self, int dim, Tensor index, Tensor src, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Scalar & value); // {"schema": "aten::scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, Tensor & out); // {"schema": "aten::scatter.value_out(Tensor self, int dim, Tensor index, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce); // {"schema": "aten::scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce); // {"schema": "aten::scatter_.reduce(Tensor(a!) self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, Tensor & out); // {"schema": "aten::scatter.reduce_out(Tensor self, int dim, Tensor index, Tensor src, *, str reduce, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce); // {"schema": "aten::scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_(Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce); // {"schema": "aten::scatter_.value_reduce(Tensor(a!) self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_out(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce, Tensor & out); // {"schema": "aten::scatter.value_reduce_out(Tensor self, int dim, Tensor index, Scalar value, *, str reduce, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter.dimname_src(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter(const Tensor & self, Dimname dim, const Tensor & index, const Scalar & value); // {"schema": "aten::scatter.dimname_value(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter_add(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_add_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_add_(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_add_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, Tensor & out); // {"schema": "aten::scatter_add.out(Tensor self, int dim, Tensor index, Tensor src, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor scatter_add(const Tensor & self, Dimname dim, const Tensor & index, const Tensor & src); // {"schema": "aten::scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor", "dispatch": "False", "default": "True"} +Tensor scatter_reduce(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, bool include_self); // {"schema": "aten::scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & scatter_reduce_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, bool include_self); // {"schema": "aten::scatter_reduce_.two(Tensor(a!) self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scatter_reduce_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce, bool include_self, Tensor & out); // {"schema": "aten::scatter_reduce.two_out(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & eq_(Tensor & self, const Scalar & other); // {"schema": "aten::eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & eq_(Tensor & self, const Tensor & other); // {"schema": "aten::eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_and.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bitwise_and_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_and.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_and(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_and.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_and(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_and.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_and(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor __and__(const Tensor & self, const Scalar & other); // {"schema": "aten::__and__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor __and__(const Tensor & self, const Tensor & other); // {"schema": "aten::__and__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & __iand__(Tensor & self, const Scalar & other); // {"schema": "aten::__iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & __iand__(Tensor & self, const Tensor & other); // {"schema": "aten::__iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & bitwise_or_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_or.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bitwise_or_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_or.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_or(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_or(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_or(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_or_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_or_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor __or__(const Tensor & self, const Scalar & other); // {"schema": "aten::__or__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor __or__(const Tensor & self, const Tensor & other); // {"schema": "aten::__or__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & __ior__(Tensor & self, const Scalar & other); // {"schema": "aten::__ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & __ior__(Tensor & self, const Tensor & other); // {"schema": "aten::__ior__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & bitwise_xor_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_xor.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & bitwise_xor_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_xor(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_xor(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor bitwise_xor(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_xor_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_xor_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor __xor__(const Tensor & self, const Scalar & other); // {"schema": "aten::__xor__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor __xor__(const Tensor & self, const Tensor & other); // {"schema": "aten::__xor__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & __ixor__(Tensor & self, const Scalar & other); // {"schema": "aten::__ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & __ixor__(Tensor & self, const Tensor & other); // {"schema": "aten::__ixor__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor __lshift__(const Tensor & self, const Scalar & other); // {"schema": "aten::__lshift__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor __lshift__(const Tensor & self, const Tensor & other); // {"schema": "aten::__lshift__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & __ilshift__(Tensor & self, const Scalar & other); // {"schema": "aten::__ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & __ilshift__(Tensor & self, const Tensor & other); // {"schema": "aten::__ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_left_shift(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_left_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_left_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_left_shift(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_left_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_left_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_left_shift(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_left_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor __rshift__(const Tensor & self, const Scalar & other); // {"schema": "aten::__rshift__.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor __rshift__(const Tensor & self, const Tensor & other); // {"schema": "aten::__rshift__.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & __irshift__(Tensor & self, const Scalar & other); // {"schema": "aten::__irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & __irshift__(Tensor & self, const Tensor & other); // {"schema": "aten::__irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_right_shift(const Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_(Tensor & self, const Tensor & other); // {"schema": "aten::bitwise_right_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_right_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bitwise_right_shift(const Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_(Tensor & self, const Scalar & other); // {"schema": "aten::bitwise_right_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::bitwise_right_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bitwise_right_shift(const Scalar & self, const Tensor & other); // {"schema": "aten::bitwise_right_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tril_(Tensor & self, int64_t diagonal); // {"schema": "aten::tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & triu_(Tensor & self, int64_t diagonal); // {"schema": "aten::triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & digamma_(Tensor & self); // {"schema": "aten::digamma_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lerp_(Tensor & self, const Tensor & end, const Scalar & weight); // {"schema": "aten::lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lerp_(Tensor & self, const Tensor & end, const Tensor & weight); // {"schema": "aten::lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addbmm_(Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & addbmm_out(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addbmm(const Tensor & self, const Tensor & batch1, const Tensor & batch2, const Scalar & beta, const Scalar & alpha); // {"schema": "aten::addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & random_(Tensor & self, int64_t from, c10::optional to, c10::optional generator); // {"schema": "aten::random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & random_(Tensor & self, int64_t to, c10::optional generator); // {"schema": "aten::random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & random_(Tensor & self, c10::optional generator); // {"schema": "aten::random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & uniform_(Tensor & self, double from, double to, c10::optional generator); // {"schema": "aten::uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & cauchy_(Tensor & self, double median, double sigma, c10::optional generator); // {"schema": "aten::cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & log_normal_(Tensor & self, double mean, double std, c10::optional generator); // {"schema": "aten::log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & exponential_(Tensor & self, double lambd, c10::optional generator); // {"schema": "aten::exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & geometric_(Tensor & self, double p, c10::optional generator); // {"schema": "aten::geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & diag_out(const Tensor & self, int64_t diagonal, Tensor & out); // {"schema": "aten::diag.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor diag(const Tensor & self, int64_t diagonal); // {"schema": "aten::diag(Tensor self, int diagonal=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & cross_out(const Tensor & self, const Tensor & other, c10::optional dim, Tensor & out); // {"schema": "aten::cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor cross(const Tensor & self, const Tensor & other, c10::optional dim); // {"schema": "aten::cross(Tensor self, Tensor other, int? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & triu_out(const Tensor & self, int64_t diagonal, Tensor & out); // {"schema": "aten::triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor triu(const Tensor & self, int64_t diagonal); // {"schema": "aten::triu(Tensor self, int diagonal=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tril_out(const Tensor & self, int64_t diagonal, Tensor & out); // {"schema": "aten::tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tril(const Tensor & self, int64_t diagonal); // {"schema": "aten::tril(Tensor self, int diagonal=0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor tril_indices(int64_t row, int64_t col, int64_t offset, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor triu_indices(int64_t row, int64_t col, int64_t offset, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::triu_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor trace(const Tensor & self); // {"schema": "aten::trace(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor trace_backward(const Tensor & grad, c10::SymIntArrayRef sizes); // {"schema": "aten::trace_backward(Tensor grad, SymInt[] sizes) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & ne_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::ne.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ne(const Tensor & self, const Scalar & other); // {"schema": "aten::ne.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ne_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ne(const Tensor & self, const Tensor & other); // {"schema": "aten::ne.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ne_(Tensor & self, const Scalar & other); // {"schema": "aten::ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ne_(Tensor & self, const Tensor & other); // {"schema": "aten::ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & not_equal_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::not_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor not_equal(const Tensor & self, const Scalar & other); // {"schema": "aten::not_equal.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & not_equal_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::not_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor not_equal(const Tensor & self, const Tensor & other); // {"schema": "aten::not_equal.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & not_equal_(Tensor & self, const Scalar & other); // {"schema": "aten::not_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & not_equal_(Tensor & self, const Tensor & other); // {"schema": "aten::not_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & eq_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor eq(const Tensor & self, const Scalar & other); // {"schema": "aten::eq.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & eq_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor eq(const Tensor & self, const Tensor & other); // {"schema": "aten::eq.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ge_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ge(const Tensor & self, const Scalar & other); // {"schema": "aten::ge.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ge_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ge(const Tensor & self, const Tensor & other); // {"schema": "aten::ge.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & ge_(Tensor & self, const Scalar & other); // {"schema": "aten::ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ge_(Tensor & self, const Tensor & other); // {"schema": "aten::ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & greater_equal_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::greater_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater_equal(const Tensor & self, const Scalar & other); // {"schema": "aten::greater_equal.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_equal_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::greater_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater_equal(const Tensor & self, const Tensor & other); // {"schema": "aten::greater_equal.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_equal_(Tensor & self, const Scalar & other); // {"schema": "aten::greater_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & greater_equal_(Tensor & self, const Tensor & other); // {"schema": "aten::greater_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & le_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor le(const Tensor & self, const Scalar & other); // {"schema": "aten::le.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & le_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor le(const Tensor & self, const Tensor & other); // {"schema": "aten::le.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & le_(Tensor & self, const Scalar & other); // {"schema": "aten::le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & le_(Tensor & self, const Tensor & other); // {"schema": "aten::le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & less_equal_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::less_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less_equal(const Tensor & self, const Scalar & other); // {"schema": "aten::less_equal.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_equal_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::less_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less_equal(const Tensor & self, const Tensor & other); // {"schema": "aten::less_equal.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_equal_(Tensor & self, const Scalar & other); // {"schema": "aten::less_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & less_equal_(Tensor & self, const Tensor & other); // {"schema": "aten::less_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & gt_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gt(const Tensor & self, const Scalar & other); // {"schema": "aten::gt.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gt_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gt(const Tensor & self, const Tensor & other); // {"schema": "aten::gt.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & gt_(Tensor & self, const Scalar & other); // {"schema": "aten::gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & gt_(Tensor & self, const Tensor & other); // {"schema": "aten::gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & greater_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::greater.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater(const Tensor & self, const Scalar & other); // {"schema": "aten::greater.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::greater.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor greater(const Tensor & self, const Tensor & other); // {"schema": "aten::greater.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & greater_(Tensor & self, const Scalar & other); // {"schema": "aten::greater_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & greater_(Tensor & self, const Tensor & other); // {"schema": "aten::greater_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & lt_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lt(const Tensor & self, const Scalar & other); // {"schema": "aten::lt.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lt_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lt(const Tensor & self, const Tensor & other); // {"schema": "aten::lt.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lt_(Tensor & self, const Scalar & other); // {"schema": "aten::lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lt_(Tensor & self, const Tensor & other); // {"schema": "aten::lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & less_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::less.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less(const Tensor & self, const Scalar & other); // {"schema": "aten::less.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::less.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor less(const Tensor & self, const Tensor & other); // {"schema": "aten::less.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & less_(Tensor & self, const Scalar & other); // {"schema": "aten::less_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & less_(Tensor & self, const Tensor & other); // {"schema": "aten::less_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & take_out(const Tensor & self, const Tensor & index, Tensor & out); // {"schema": "aten::take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor take(const Tensor & self, const Tensor & index); // {"schema": "aten::take(Tensor self, Tensor index) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & take_along_dim_out(const Tensor & self, const Tensor & indices, c10::optional dim, Tensor & out); // {"schema": "aten::take_along_dim.out(Tensor self, Tensor indices, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor take_along_dim(const Tensor & self, const Tensor & indices, c10::optional dim); // {"schema": "aten::take_along_dim(Tensor self, Tensor indices, int? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & index_select_out(const Tensor & self, int64_t dim, const Tensor & index, Tensor & out); // {"schema": "aten::index_select.out(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor index_select(const Tensor & self, int64_t dim, const Tensor & index); // {"schema": "aten::index_select(Tensor self, int dim, Tensor index) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & index_select_out(const Tensor & self, Dimname dim, const Tensor & index, Tensor & out); // {"schema": "aten::index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor index_select(const Tensor & self, Dimname dim, const Tensor & index); // {"schema": "aten::index_select.dimname(Tensor self, Dimname dim, Tensor index) -> Tensor", "dispatch": "False", "default": "True"} +Tensor index_select_backward(const Tensor & grad, c10::SymIntArrayRef self_sizes, int64_t dim, const Tensor & index); // {"schema": "aten::index_select_backward(Tensor grad, SymInt[] self_sizes, int dim, Tensor index) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & masked_select_out(const Tensor & self, const Tensor & mask, Tensor & out); // {"schema": "aten::masked_select.out(Tensor self, Tensor mask, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor masked_select(const Tensor & self, const Tensor & mask); // {"schema": "aten::masked_select(Tensor self, Tensor mask) -> Tensor", "dispatch": "True", "default": "False"} +Tensor masked_select_backward(const Tensor & grad, const Tensor & input, const Tensor & mask); // {"schema": "aten::masked_select_backward(Tensor grad, Tensor input, Tensor mask) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nonzero_out(const Tensor & self, Tensor & out); // {"schema": "aten::nonzero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nonzero(const Tensor & self); // {"schema": "aten::nonzero(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & nonzero_static_out(const Tensor & self, int64_t size, int64_t fill_value, Tensor & out); // {"schema": "aten::nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nonzero_static(const Tensor & self, int64_t size, int64_t fill_value); // {"schema": "aten::nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor", "dispatch": "True", "default": "False"} +::std::vector nonzero_numpy(const Tensor & self); // {"schema": "aten::nonzero_numpy(Tensor self) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor argwhere(const Tensor & self); // {"schema": "aten::argwhere(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & gather_out(const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad, Tensor & out); // {"schema": "aten::gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor gather(const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad); // {"schema": "aten::gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor gather_backward(const Tensor & grad, const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad); // {"schema": "aten::gather_backward(Tensor grad, Tensor self, int dim, Tensor index, bool sparse_grad) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & gather_out(const Tensor & self, Dimname dim, const Tensor & index, bool sparse_grad, Tensor & out); // {"schema": "aten::gather.dimname_out(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor gather(const Tensor & self, Dimname dim, const Tensor & index, bool sparse_grad); // {"schema": "aten::gather.dimname(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _gather_sparse_backward(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & grad); // {"schema": "aten::_gather_sparse_backward(Tensor self, int dim, Tensor index, Tensor grad) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & addcmul_out(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value, Tensor & out); // {"schema": "aten::addcmul.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addcmul(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addcmul_(Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & addcdiv_out(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value, Tensor & out); // {"schema": "aten::addcdiv.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor addcdiv(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & addcdiv_(Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value); // {"schema": "aten::addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor cross_entropy_loss(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, double label_smoothing); // {"schema": "aten::cross_entropy_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100, float label_smoothing=0.0) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple triangular_solve_out(const Tensor & self, const Tensor & A, bool upper, bool transpose, bool unitriangular, Tensor & X, Tensor & M); // {"schema": "aten::triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient)", "dispatch": "True", "default": "False"} +::std::tuple triangular_solve(const Tensor & self, const Tensor & A, bool upper, bool transpose, bool unitriangular); // {"schema": "aten::triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)", "dispatch": "True", "default": "True"} +void _linalg_check_errors(const Tensor & info, c10::string_view api_name, bool is_matrix); // {"schema": "aten::_linalg_check_errors(Tensor info, str api_name, *, bool is_matrix) -> ()", "dispatch": "True", "default": "True"} +Tensor & linalg_solve_triangular_out(const Tensor & self, const Tensor & B, bool upper, bool left, bool unitriangular, Tensor & out); // {"schema": "aten::linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor linalg_solve_triangular(const Tensor & self, const Tensor & B, bool upper, bool left, bool unitriangular); // {"schema": "aten::linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor linalg_vander(const Tensor & x, c10::optional N); // {"schema": "aten::linalg_vander(Tensor x, *, SymInt? N=None) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple svd_out(const Tensor & self, bool some, bool compute_uv, Tensor & U, Tensor & S, Tensor & V); // {"schema": "aten::svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)", "dispatch": "False", "default": "True"} +::std::tuple svd(const Tensor & self, bool some, bool compute_uv); // {"schema": "aten::svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)", "dispatch": "False", "default": "True"} +Tensor swapaxes(const Tensor & self, int64_t axis0, int64_t axis1); // {"schema": "aten::swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor & swapaxes_(Tensor & self, int64_t axis0, int64_t axis1); // {"schema": "aten::swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor swapdims(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a)", "dispatch": "False", "default": "True"} +Tensor & swapdims_(Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::swapdims_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & cholesky_out(const Tensor & self, bool upper, Tensor & out); // {"schema": "aten::cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor cholesky(const Tensor & self, bool upper); // {"schema": "aten::cholesky(Tensor self, bool upper=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & cholesky_solve_out(const Tensor & self, const Tensor & input2, bool upper, Tensor & out); // {"schema": "aten::cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor cholesky_solve(const Tensor & self, const Tensor & input2, bool upper); // {"schema": "aten::cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _cholesky_solve_helper(const Tensor & self, const Tensor & A, bool upper); // {"schema": "aten::_cholesky_solve_helper(Tensor self, Tensor A, bool upper) -> Tensor", "dispatch": "True", "default": "False"} +Tensor cholesky_inverse(const Tensor & self, bool upper); // {"schema": "aten::cholesky_inverse(Tensor self, bool upper=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & cholesky_inverse_out(const Tensor & self, bool upper, Tensor & out); // {"schema": "aten::cholesky_inverse.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple qr_out(const Tensor & self, bool some, Tensor & Q, Tensor & R); // {"schema": "aten::qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)", "dispatch": "False", "default": "True"} +::std::tuple qr(const Tensor & self, bool some); // {"schema": "aten::qr(Tensor self, bool some=True) -> (Tensor Q, Tensor R)", "dispatch": "False", "default": "True"} +::std::tuple geqrf_out(const Tensor & self, Tensor & a, Tensor & tau); // {"schema": "aten::geqrf.a(Tensor self, *, Tensor(a!) a, Tensor(b!) tau) -> (Tensor(a!) a, Tensor(b!) tau)", "dispatch": "True", "default": "False"} +::std::tuple geqrf(const Tensor & self); // {"schema": "aten::geqrf(Tensor self) -> (Tensor a, Tensor tau)", "dispatch": "True", "default": "False"} +Tensor orgqr(const Tensor & self, const Tensor & input2); // {"schema": "aten::orgqr(Tensor self, Tensor input2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & orgqr_out(const Tensor & self, const Tensor & input2, Tensor & out); // {"schema": "aten::orgqr.out(Tensor self, Tensor input2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & ormqr_out(const Tensor & self, const Tensor & input2, const Tensor & input3, bool left, bool transpose, Tensor & out); // {"schema": "aten::ormqr.out(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor ormqr(const Tensor & self, const Tensor & input2, const Tensor & input3, bool left, bool transpose); // {"schema": "aten::ormqr(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _lu_with_info(const Tensor & self, bool pivot, bool check_errors); // {"schema": "aten::_lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor LU, Tensor pivots, Tensor info)", "dispatch": "False", "default": "True"} +Tensor & lu_solve_out(const Tensor & self, const Tensor & LU_data, const Tensor & LU_pivots, Tensor & out); // {"schema": "aten::lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor lu_solve(const Tensor & self, const Tensor & LU_data, const Tensor & LU_pivots); // {"schema": "aten::lu_solve(Tensor self, Tensor LU_data, Tensor LU_pivots) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple lu_unpack(const Tensor & LU_data, const Tensor & LU_pivots, bool unpack_data, bool unpack_pivots); // {"schema": "aten::lu_unpack(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True) -> (Tensor P, Tensor L, Tensor U)", "dispatch": "True", "default": "True"} +::std::tuple lu_unpack_out(const Tensor & LU_data, const Tensor & LU_pivots, bool unpack_data, bool unpack_pivots, Tensor & P, Tensor & L, Tensor & U); // {"schema": "aten::lu_unpack.out(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True, *, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)", "dispatch": "True", "default": "False"} +Tensor & multinomial_out(const Tensor & self, int64_t num_samples, bool replacement, c10::optional generator, Tensor & out); // {"schema": "aten::multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multinomial(const Tensor & self, int64_t num_samples, bool replacement, c10::optional generator); // {"schema": "aten::multinomial(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & lgamma_out(const Tensor & self, Tensor & out); // {"schema": "aten::lgamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & lgamma_(Tensor & self); // {"schema": "aten::lgamma_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor lgamma(const Tensor & self); // {"schema": "aten::lgamma(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & digamma_out(const Tensor & self, Tensor & out); // {"schema": "aten::digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor digamma(const Tensor & self); // {"schema": "aten::digamma(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & polygamma_out(int64_t n, const Tensor & self, Tensor & out); // {"schema": "aten::polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor polygamma(int64_t n, const Tensor & self); // {"schema": "aten::polygamma(int n, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & polygamma_(Tensor & self, int64_t n); // {"schema": "aten::polygamma_(Tensor(a!) self, int n) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor erfinv(const Tensor & self); // {"schema": "aten::erfinv(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & erfinv_(Tensor & self); // {"schema": "aten::erfinv_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & erfinv_out(const Tensor & self, Tensor & out); // {"schema": "aten::erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor i0(const Tensor & self); // {"schema": "aten::i0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & i0_(Tensor & self); // {"schema": "aten::i0_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & i0_out(const Tensor & self, Tensor & out); // {"schema": "aten::i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sign(const Tensor & self); // {"schema": "aten::sign(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sign_(Tensor & self); // {"schema": "aten::sign_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sign_out(const Tensor & self, Tensor & out); // {"schema": "aten::sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor signbit(const Tensor & self); // {"schema": "aten::signbit(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & signbit_out(const Tensor & self, Tensor & out); // {"schema": "aten::signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor dist(const Tensor & self, const Tensor & other, const Scalar & p); // {"schema": "aten::dist(Tensor self, Tensor other, Scalar p=2) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & atan2_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & atan2_(Tensor & self, const Tensor & other); // {"schema": "aten::atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor atan2(const Tensor & self, const Tensor & other); // {"schema": "aten::atan2(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor arctan2(const Tensor & self, const Tensor & other); // {"schema": "aten::arctan2(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & arctan2_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::arctan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & arctan2_(Tensor & self, const Tensor & other); // {"schema": "aten::arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & lerp_out(const Tensor & self, const Tensor & end, const Scalar & weight, Tensor & out); // {"schema": "aten::lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & lerp_out(const Tensor & self, const Tensor & end, const Tensor & weight, Tensor & out); // {"schema": "aten::lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor lerp(const Tensor & self, const Tensor & end, const Scalar & weight); // {"schema": "aten::lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor", "dispatch": "True", "default": "True"} +Tensor lerp(const Tensor & self, const Tensor & end, const Tensor & weight); // {"schema": "aten::lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & histc_out(const Tensor & self, int64_t bins, const Scalar & min, const Scalar & max, Tensor & out); // {"schema": "aten::histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor histc(const Tensor & self, int64_t bins, const Scalar & min, const Scalar & max); // {"schema": "aten::histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple histogram_out(const Tensor & self, const Tensor & bins, const c10::optional & weight, bool density, Tensor & hist, Tensor & bin_edges); // {"schema": "aten::histogram.bins_tensor_out(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)", "dispatch": "True", "default": "False"} +::std::tuple histogram(const Tensor & self, const Tensor & bins, const c10::optional & weight, bool density); // {"schema": "aten::histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)", "dispatch": "True", "default": "False"} +::std::tuple histogram_out(const Tensor & self, int64_t bins, c10::optional> range, const c10::optional & weight, bool density, Tensor & hist, Tensor & bin_edges); // {"schema": "aten::histogram.bin_ct_out(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)", "dispatch": "True", "default": "False"} +::std::tuple histogram(const Tensor & self, int64_t bins, c10::optional> range, const c10::optional & weight, bool density); // {"schema": "aten::histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)", "dispatch": "True", "default": "False"} +::std::vector _histogramdd_bin_edges(const Tensor & self, IntArrayRef bins, c10::optional> range, const c10::optional & weight, bool density); // {"schema": "aten::_histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]", "dispatch": "True", "default": "False"} +Tensor _histogramdd_from_bin_cts(const Tensor & self, IntArrayRef bins, c10::optional> range, const c10::optional & weight, bool density); // {"schema": "aten::_histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _histogramdd_from_bin_tensors(const Tensor & self, TensorList bins, const c10::optional & weight, bool density); // {"schema": "aten::_histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple> histogramdd(const Tensor & self, IntArrayRef bins, c10::optional> range, const c10::optional & weight, bool density); // {"schema": "aten::histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)", "dispatch": "False", "default": "True"} +::std::tuple> histogramdd(const Tensor & self, int64_t bins, c10::optional> range, const c10::optional & weight, bool density); // {"schema": "aten::histogramdd.int_bins(Tensor self, int bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)", "dispatch": "False", "default": "True"} +::std::tuple> histogramdd(const Tensor & self, TensorList bins, c10::optional> range, const c10::optional & weight, bool density); // {"schema": "aten::histogramdd.TensorList_bins(Tensor self, Tensor[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)", "dispatch": "False", "default": "True"} +Tensor & fmod_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor fmod(const Tensor & self, const Scalar & other); // {"schema": "aten::fmod.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmod_(Tensor & self, const Scalar & other); // {"schema": "aten::fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & fmod_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::fmod.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fmod(const Tensor & self, const Tensor & other); // {"schema": "aten::fmod.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmod_(Tensor & self, const Tensor & other); // {"schema": "aten::fmod_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hypot_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::hypot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hypot(const Tensor & self, const Tensor & other); // {"schema": "aten::hypot(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hypot_(Tensor & self, const Tensor & other); // {"schema": "aten::hypot_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & igamma_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::igamma.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor igamma(const Tensor & self, const Tensor & other); // {"schema": "aten::igamma(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & igamma_(Tensor & self, const Tensor & other); // {"schema": "aten::igamma_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & igammac_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::igammac.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor igammac(const Tensor & self, const Tensor & other); // {"schema": "aten::igammac(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & igammac_(Tensor & self, const Tensor & other); // {"schema": "aten::igammac_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & nextafter_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::nextafter.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nextafter(const Tensor & self, const Tensor & other); // {"schema": "aten::nextafter(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & nextafter_(Tensor & self, const Tensor & other); // {"schema": "aten::nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & remainder_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::remainder.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor remainder(const Tensor & self, const Scalar & other); // {"schema": "aten::remainder.Scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & remainder_(Tensor & self, const Scalar & other); // {"schema": "aten::remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & remainder_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::remainder.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor remainder(const Tensor & self, const Tensor & other); // {"schema": "aten::remainder.Tensor(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & remainder_(Tensor & self, const Tensor & other); // {"schema": "aten::remainder_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor remainder(const Scalar & self, const Tensor & other); // {"schema": "aten::remainder.Scalar_Tensor(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "False"} +Tensor min(const Tensor & self); // {"schema": "aten::min(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & min_out(const Tensor & self, Tensor & out); // {"schema": "aten::min.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fmin(const Tensor & self, const Tensor & other); // {"schema": "aten::fmin(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmin_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::fmin.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max(const Tensor & self); // {"schema": "aten::max(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor fmax(const Tensor & self, const Tensor & other); // {"schema": "aten::fmax(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fmax_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::fmax.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor maximum(const Tensor & self, const Tensor & other); // {"schema": "aten::maximum(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & maximum_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::maximum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max(const Tensor & self, const Tensor & other); // {"schema": "aten::max.other(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & max_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::max.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & max_out(const Tensor & self, Tensor & out); // {"schema": "aten::max.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor minimum(const Tensor & self, const Tensor & other); // {"schema": "aten::minimum(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & minimum_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::minimum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & min_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::min.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor min(const Tensor & self, const Tensor & other); // {"schema": "aten::min.other(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor quantile(const Tensor & self, const Tensor & q, c10::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & quantile_out(const Tensor & self, const Tensor & q, c10::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor quantile(const Tensor & self, double q, c10::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & quantile_out(const Tensor & self, double q, c10::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nanquantile(const Tensor & self, const Tensor & q, c10::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nanquantile_out(const Tensor & self, const Tensor & q, c10::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nanquantile(const Tensor & self, double q, c10::optional dim, bool keepdim, c10::string_view interpolation); // {"schema": "aten::nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor", "dispatch": "False", "default": "True"} +Tensor & nanquantile_out(const Tensor & self, double q, c10::optional dim, bool keepdim, c10::string_view interpolation, Tensor & out); // {"schema": "aten::nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple sort_out(const Tensor & self, int64_t dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "True"} +::std::tuple sort_out(const Tensor & self, c10::optional stable, int64_t dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.values_stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple sort(const Tensor & self, int64_t dim, bool descending); // {"schema": "aten::sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple sort(const Tensor & self, c10::optional stable, int64_t dim, bool descending); // {"schema": "aten::sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +::std::tuple sort_out(const Tensor & self, Dimname dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.dimname_values(Tensor self, Dimname dim, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +::std::tuple sort_out(const Tensor & self, c10::optional stable, Dimname dim, bool descending, Tensor & values, Tensor & indices); // {"schema": "aten::sort.dimname_values_stable(Tensor self, *, bool? stable, Dimname dim, bool descending=False, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "False", "default": "True"} +::std::tuple sort(const Tensor & self, Dimname dim, bool descending); // {"schema": "aten::sort.dimname(Tensor self, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +::std::tuple sort(const Tensor & self, c10::optional stable, Dimname dim, bool descending); // {"schema": "aten::sort.dimname_stable(Tensor self, *, bool? stable, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices)", "dispatch": "False", "default": "True"} +Tensor & msort_out(const Tensor & self, Tensor & out); // {"schema": "aten::msort.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor msort(const Tensor & self); // {"schema": "aten::msort(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor argsort(const Tensor & self, int64_t dim, bool descending); // {"schema": "aten::argsort(Tensor self, int dim=-1, bool descending=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor argsort(const Tensor & self, bool stable, int64_t dim, bool descending); // {"schema": "aten::argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor argsort(const Tensor & self, Dimname dim, bool descending); // {"schema": "aten::argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple topk_out(const Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted, Tensor & values, Tensor & indices); // {"schema": "aten::topk.values(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)", "dispatch": "True", "default": "False"} +::std::tuple topk(const Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted); // {"schema": "aten::topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)", "dispatch": "True", "default": "True"} +Tensor all(const Tensor & self); // {"schema": "aten::all(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & all_out(const Tensor & self, Tensor & out); // {"schema": "aten::all.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor any(const Tensor & self); // {"schema": "aten::any(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & any_out(const Tensor & self, Tensor & out); // {"schema": "aten::any.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & renorm_out(const Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm, Tensor & out); // {"schema": "aten::renorm.out(Tensor self, Scalar p, int dim, Scalar maxnorm, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor renorm(const Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm); // {"schema": "aten::renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & renorm_(Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm); // {"schema": "aten::renorm_(Tensor(a!) self, Scalar p, int dim, Scalar maxnorm) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor unfold(const Tensor & self, int64_t dimension, int64_t size, int64_t step); // {"schema": "aten::unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a)", "dispatch": "True", "default": "False"} +Tensor unfold_backward(const Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step); // {"schema": "aten::unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor", "dispatch": "True", "default": "False"} +bool equal(const Tensor & self, const Tensor & other); // {"schema": "aten::equal(Tensor self, Tensor other) -> bool", "dispatch": "True", "default": "False"} +Tensor & pow_out(const Tensor & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor pow(const Tensor & self, const Tensor & exponent); // {"schema": "aten::pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & pow_out(const Scalar & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::pow.Scalar_out(Scalar self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor pow(const Scalar & self, const Tensor & exponent); // {"schema": "aten::pow.Scalar(Scalar self, Tensor exponent) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & pow_out(const Tensor & self, const Scalar & exponent, Tensor & out); // {"schema": "aten::pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor pow(const Tensor & self, const Scalar & exponent); // {"schema": "aten::pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & pow_(Tensor & self, const Scalar & exponent); // {"schema": "aten::pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & pow_(Tensor & self, const Tensor & exponent); // {"schema": "aten::pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & float_power_out(const Tensor & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::float_power.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor float_power(const Tensor & self, const Tensor & exponent); // {"schema": "aten::float_power.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & float_power_out(const Scalar & self, const Tensor & exponent, Tensor & out); // {"schema": "aten::float_power.Scalar_out(Scalar self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor float_power(const Scalar & self, const Tensor & exponent); // {"schema": "aten::float_power.Scalar(Scalar self, Tensor exponent) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & float_power_out(const Tensor & self, const Scalar & exponent, Tensor & out); // {"schema": "aten::float_power.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor float_power(const Tensor & self, const Scalar & exponent); // {"schema": "aten::float_power.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & float_power_(Tensor & self, const Scalar & exponent); // {"schema": "aten::float_power_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & float_power_(Tensor & self, const Tensor & exponent); // {"schema": "aten::float_power_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & normal_(Tensor & self, double mean, double std, c10::optional generator); // {"schema": "aten::normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal_functional(const Tensor & self, double mean, double std, c10::optional generator); // {"schema": "aten::normal_functional(Tensor self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & normal_out(const Tensor & mean, double std, c10::optional generator, Tensor & out); // {"schema": "aten::normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal(const Tensor & mean, double std, c10::optional generator); // {"schema": "aten::normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & normal_out(double mean, const Tensor & std, c10::optional generator, Tensor & out); // {"schema": "aten::normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal(double mean, const Tensor & std, c10::optional generator); // {"schema": "aten::normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & normal_out(const Tensor & mean, const Tensor & std, c10::optional generator, Tensor & out); // {"schema": "aten::normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor normal(const Tensor & mean, const Tensor & std, c10::optional generator); // {"schema": "aten::normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor normal(double mean, double std, c10::SymIntArrayRef size, c10::optional generator, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::normal.float_float(float mean, float std, SymInt[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & normal_out(double mean, double std, c10::SymIntArrayRef size, c10::optional generator, Tensor & out); // {"schema": "aten::normal.float_float_out(float mean, float std, SymInt[] size, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor alias(const Tensor & self); // {"schema": "aten::alias(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +void _amp_foreach_non_finite_check_and_unscale_(TensorList self, Tensor & found_inf, const Tensor & inv_scale); // {"schema": "aten::_amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()", "dispatch": "True", "default": "False"} +Tensor & _amp_update_scale_(Tensor & self, Tensor & growth_tracker, const Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); // {"schema": "aten::_amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::vector _foreach_add(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_add.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_add_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_add(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_add_(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_add(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_add_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_add(TensorList self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_add_(TensorList self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sub(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sub_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sub(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_sub.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sub_(TensorList self, TensorList other, const Scalar & alpha); // {"schema": "aten::_foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sub(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_sub.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sub_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_mul(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_mul.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_mul_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_mul(TensorList self, TensorList other); // {"schema": "aten::_foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_mul_(TensorList self, TensorList other); // {"schema": "aten::_foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_mul(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_mul_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_mul(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_mul.Tensor(Tensor[] self, Tensor other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_mul_(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_div(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_div_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_div(TensorList self, TensorList other); // {"schema": "aten::_foreach_div.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_div_(TensorList self, TensorList other); // {"schema": "aten::_foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_div(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_div_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_div(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_div_(TensorList self, const Tensor & other); // {"schema": "aten::_foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_clamp_max(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_clamp_max_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_clamp_max(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_max.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_clamp_max_(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_max_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_clamp_max(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_max.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_clamp_max_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_clamp_min(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_min.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_clamp_min_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_clamp_min(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_min.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_clamp_min_(TensorList self, TensorList other); // {"schema": "aten::_foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_clamp_min(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_min.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_clamp_min_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_maximum(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_maximum_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_maximum(TensorList self, TensorList other); // {"schema": "aten::_foreach_maximum.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_maximum_(TensorList self, TensorList other); // {"schema": "aten::_foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_maximum(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_maximum_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_minimum(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_minimum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_minimum_(TensorList self, const Scalar & scalar); // {"schema": "aten::_foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_minimum(TensorList self, TensorList other); // {"schema": "aten::_foreach_minimum.List(Tensor[] self, Tensor[] other) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_minimum_(TensorList self, TensorList other); // {"schema": "aten::_foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_minimum(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_minimum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_minimum_(TensorList self, ArrayRef scalars); // {"schema": "aten::_foreach_minimum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_addcdiv(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcdiv.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_addcdiv(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_addcdiv(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_addcdiv_(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()", "dispatch": "True", "default": "False"} +void _foreach_addcdiv_(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +void _foreach_addcdiv_(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcdiv_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_addcmul(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcmul.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_addcmul(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_addcmul(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_addcmul_(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value); // {"schema": "aten::_foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()", "dispatch": "True", "default": "False"} +void _foreach_addcmul_(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars); // {"schema": "aten::_foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()", "dispatch": "True", "default": "False"} +void _foreach_addcmul_(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars); // {"schema": "aten::_foreach_addcmul_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_abs(TensorList self); // {"schema": "aten::_foreach_abs(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_abs_(TensorList self); // {"schema": "aten::_foreach_abs_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_acos(TensorList self); // {"schema": "aten::_foreach_acos(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_acos_(TensorList self); // {"schema": "aten::_foreach_acos_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_asin(TensorList self); // {"schema": "aten::_foreach_asin(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_asin_(TensorList self); // {"schema": "aten::_foreach_asin_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_atan(TensorList self); // {"schema": "aten::_foreach_atan(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_atan_(TensorList self); // {"schema": "aten::_foreach_atan_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_ceil(TensorList self); // {"schema": "aten::_foreach_ceil(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_ceil_(TensorList self); // {"schema": "aten::_foreach_ceil_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_cos(TensorList self); // {"schema": "aten::_foreach_cos(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_cos_(TensorList self); // {"schema": "aten::_foreach_cos_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_cosh(TensorList self); // {"schema": "aten::_foreach_cosh(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_cosh_(TensorList self); // {"schema": "aten::_foreach_cosh_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_erf(TensorList self); // {"schema": "aten::_foreach_erf(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_erf_(TensorList self); // {"schema": "aten::_foreach_erf_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_erfc(TensorList self); // {"schema": "aten::_foreach_erfc(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_erfc_(TensorList self); // {"schema": "aten::_foreach_erfc_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_exp(TensorList self); // {"schema": "aten::_foreach_exp(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_exp_(TensorList self); // {"schema": "aten::_foreach_exp_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_expm1(TensorList self); // {"schema": "aten::_foreach_expm1(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_expm1_(TensorList self); // {"schema": "aten::_foreach_expm1_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_floor(TensorList self); // {"schema": "aten::_foreach_floor(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_floor_(TensorList self); // {"schema": "aten::_foreach_floor_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_frac(TensorList self); // {"schema": "aten::_foreach_frac(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_frac_(TensorList self); // {"schema": "aten::_foreach_frac_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_lerp(TensorList self, TensorList tensors1, TensorList weights); // {"schema": "aten::_foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_lerp_(TensorList self, TensorList tensors1, TensorList weights); // {"schema": "aten::_foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_lerp(TensorList self, TensorList tensors1, const Scalar & weight); // {"schema": "aten::_foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_lerp_(TensorList self, TensorList tensors1, const Scalar & weight); // {"schema": "aten::_foreach_lerp_.Scalar(Tensor(a!)[] self, Tensor[] tensors1, Scalar weight) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_lgamma(TensorList self); // {"schema": "aten::_foreach_lgamma(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_lgamma_(TensorList self); // {"schema": "aten::_foreach_lgamma_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_log(TensorList self); // {"schema": "aten::_foreach_log(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_log_(TensorList self); // {"schema": "aten::_foreach_log_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_log10(TensorList self); // {"schema": "aten::_foreach_log10(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_log10_(TensorList self); // {"schema": "aten::_foreach_log10_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_log1p(TensorList self); // {"schema": "aten::_foreach_log1p(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_log1p_(TensorList self); // {"schema": "aten::_foreach_log1p_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_log2(TensorList self); // {"schema": "aten::_foreach_log2(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_log2_(TensorList self); // {"schema": "aten::_foreach_log2_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_neg(TensorList self); // {"schema": "aten::_foreach_neg(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_neg_(TensorList self); // {"schema": "aten::_foreach_neg_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_norm(TensorList self, const Scalar & ord); // {"schema": "aten::_foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_pow(TensorList self, TensorList exponent); // {"schema": "aten::_foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_pow(TensorList self, const Scalar & exponent); // {"schema": "aten::_foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_pow(TensorList self, ArrayRef exponent); // {"schema": "aten::_foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]", "dispatch": "True", "default": "False"} +::std::vector _foreach_pow(const Scalar & self, TensorList exponent); // {"schema": "aten::_foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_pow_(TensorList self, TensorList exponent); // {"schema": "aten::_foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()", "dispatch": "True", "default": "False"} +void _foreach_pow_(TensorList self, const Scalar & exponent); // {"schema": "aten::_foreach_pow_.Scalar(Tensor(a!)[] self, Scalar exponent) -> ()", "dispatch": "True", "default": "False"} +void _foreach_pow_(TensorList self, ArrayRef exponent); // {"schema": "aten::_foreach_pow_.ScalarList(Tensor(a!)[] self, Scalar[] exponent) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_reciprocal(TensorList self); // {"schema": "aten::_foreach_reciprocal(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_reciprocal_(TensorList self); // {"schema": "aten::_foreach_reciprocal_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_round(TensorList self); // {"schema": "aten::_foreach_round(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_round_(TensorList self); // {"schema": "aten::_foreach_round_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sigmoid(TensorList self); // {"schema": "aten::_foreach_sigmoid(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sigmoid_(TensorList self); // {"schema": "aten::_foreach_sigmoid_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sign(TensorList self); // {"schema": "aten::_foreach_sign(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sign_(TensorList self); // {"schema": "aten::_foreach_sign_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sin(TensorList self); // {"schema": "aten::_foreach_sin(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sin_(TensorList self); // {"schema": "aten::_foreach_sin_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sinh(TensorList self); // {"schema": "aten::_foreach_sinh(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sinh_(TensorList self); // {"schema": "aten::_foreach_sinh_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_sqrt(TensorList self); // {"schema": "aten::_foreach_sqrt(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_sqrt_(TensorList self); // {"schema": "aten::_foreach_sqrt_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_tan(TensorList self); // {"schema": "aten::_foreach_tan(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_tan_(TensorList self); // {"schema": "aten::_foreach_tan_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_tanh(TensorList self); // {"schema": "aten::_foreach_tanh(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_tanh_(TensorList self); // {"schema": "aten::_foreach_tanh_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +::std::vector _foreach_trunc(TensorList self); // {"schema": "aten::_foreach_trunc(Tensor[] self) -> Tensor[]", "dispatch": "True", "default": "False"} +void _foreach_trunc_(TensorList self); // {"schema": "aten::_foreach_trunc_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +void _foreach_zero_(TensorList self); // {"schema": "aten::_foreach_zero_(Tensor(a!)[] self) -> ()", "dispatch": "True", "default": "False"} +void _foreach_copy_(TensorList self, TensorList src, bool non_blocking); // {"schema": "aten::_foreach_copy_(Tensor(a!)[] self, Tensor[] src, bool non_blocking=False) -> ()", "dispatch": "True", "default": "False"} +Tensor bucketize(const Tensor & self, const Tensor & boundaries, bool out_int32, bool right); // {"schema": "aten::bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & bucketize_out(const Tensor & self, const Tensor & boundaries, bool out_int32, bool right, Tensor & out); // {"schema": "aten::bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor bucketize(const Scalar & self, const Tensor & boundaries, bool out_int32, bool right); // {"schema": "aten::bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor", "dispatch": "True", "default": "False"} +Tensor searchsorted(const Tensor & sorted_sequence, const Tensor & self, bool out_int32, bool right, c10::optional side, const c10::optional & sorter); // {"schema": "aten::searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & searchsorted_out(const Tensor & sorted_sequence, const Tensor & self, bool out_int32, bool right, c10::optional side, const c10::optional & sorter, Tensor & out); // {"schema": "aten::searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor searchsorted(const Tensor & sorted_sequence, const Scalar & self, bool out_int32, bool right, c10::optional side, const c10::optional & sorter); // {"schema": "aten::searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & searchsorted_out(const Tensor & sorted_sequence, const Scalar & self, bool out_int32, bool right, c10::optional side, const c10::optional & sorter, Tensor & out); // {"schema": "aten::searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _convert_indices_from_coo_to_csr(const Tensor & self, int64_t size, bool out_int32); // {"schema": "aten::_convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _convert_indices_from_coo_to_csr_out(const Tensor & self, int64_t size, bool out_int32, Tensor & out); // {"schema": "aten::_convert_indices_from_coo_to_csr.out(Tensor self, int size, *, bool out_int32=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _convert_indices_from_csr_to_coo(const Tensor & crow_indices, const Tensor & col_indices, bool out_int32, bool transpose); // {"schema": "aten::_convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _convert_indices_from_csr_to_coo_out(const Tensor & crow_indices, const Tensor & col_indices, bool out_int32, bool transpose, Tensor & out); // {"schema": "aten::_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & mse_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & out); // {"schema": "aten::mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mse_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::mse_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & mse_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, Tensor & grad_input); // {"schema": "aten::mse_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mse_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor", "dispatch": "True", "default": "False"} +Tensor l1_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & multi_margin_loss_out(const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const c10::optional & weight, int64_t reduction, Tensor & out); // {"schema": "aten::multi_margin_loss.out(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multi_margin_loss(const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const c10::optional & weight, int64_t reduction); // {"schema": "aten::multi_margin_loss(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & multi_margin_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const c10::optional & weight, int64_t reduction, Tensor & grad_input); // {"schema": "aten::multi_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Scalar p, Scalar margin, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multi_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const c10::optional & weight, int64_t reduction); // {"schema": "aten::multi_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, Scalar p, Scalar margin, Tensor? weight=None, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & multilabel_margin_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & out); // {"schema": "aten::multilabel_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor multilabel_margin_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::multilabel_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple multilabel_margin_loss_forward_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & output, Tensor & is_target); // {"schema": "aten::multilabel_margin_loss_forward.output(Tensor self, Tensor target, int reduction, *, Tensor(a!) output, Tensor(b!) is_target) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple multilabel_margin_loss_forward(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::multilabel_margin_loss_forward(Tensor self, Tensor target, int reduction) -> (Tensor output, Tensor is_target)", "dispatch": "True", "default": "False"} +Tensor & multilabel_margin_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, const Tensor & is_target, Tensor & grad_input); // {"schema": "aten::multilabel_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor multilabel_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, const Tensor & is_target); // {"schema": "aten::multilabel_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & nll_loss_out(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & out); // {"schema": "aten::nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nll_loss_nd(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss_nd(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100) -> Tensor", "dispatch": "False", "default": "True"} +Tensor nll_loss(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple nll_loss_forward_out(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & output, Tensor & total_weight); // {"schema": "aten::nll_loss_forward.output(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, *, Tensor(a!) output, Tensor(b!) total_weight) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple nll_loss_forward(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss_forward(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index) -> (Tensor output, Tensor total_weight)", "dispatch": "True", "default": "True"} +Tensor & nll_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight, Tensor & grad_input); // {"schema": "aten::nll_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nll_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight); // {"schema": "aten::nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & nll_loss2d_out(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & out); // {"schema": "aten::nll_loss2d.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nll_loss2d(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss2d(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, SymInt ignore_index=-100) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple nll_loss2d_forward_out(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, Tensor & output, Tensor & total_weight); // {"schema": "aten::nll_loss2d_forward.output(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, *, Tensor(a!) output, Tensor(b!) total_weight) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple nll_loss2d_forward(const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index); // {"schema": "aten::nll_loss2d_forward(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index) -> (Tensor output, Tensor total_weight)", "dispatch": "True", "default": "False"} +Tensor & nll_loss2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight, Tensor & grad_input); // {"schema": "aten::nll_loss2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor nll_loss2d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional & weight, int64_t reduction, c10::SymInt ignore_index, const Tensor & total_weight); // {"schema": "aten::nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & smooth_l1_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, double beta, Tensor & out); // {"schema": "aten::smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor smooth_l1_loss(const Tensor & self, const Tensor & target, int64_t reduction, double beta); // {"schema": "aten::smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & smooth_l1_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double beta, Tensor & grad_input); // {"schema": "aten::smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor smooth_l1_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double beta); // {"schema": "aten::smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & huber_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, double delta, Tensor & out); // {"schema": "aten::huber_loss.out(Tensor self, Tensor target, int reduction=Mean, float delta=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor huber_loss(const Tensor & self, const Tensor & target, int64_t reduction, double delta); // {"schema": "aten::huber_loss(Tensor self, Tensor target, int reduction=Mean, float delta=1.0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & huber_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double delta, Tensor & grad_input); // {"schema": "aten::huber_loss_backward.out(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor huber_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double delta); // {"schema": "aten::huber_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & soft_margin_loss_out(const Tensor & self, const Tensor & target, int64_t reduction, Tensor & out); // {"schema": "aten::soft_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor soft_margin_loss(const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::soft_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & soft_margin_loss_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, Tensor & grad_input); // {"schema": "aten::soft_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor soft_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction); // {"schema": "aten::soft_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & elu_out(const Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, Tensor & out); // {"schema": "aten::elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor elu(const Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale); // {"schema": "aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & elu_backward_out(const Tensor & grad_output, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, bool is_result, const Tensor & self_or_result, Tensor & grad_input); // {"schema": "aten::elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor elu_backward(const Tensor & grad_output, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, bool is_result, const Tensor & self_or_result); // {"schema": "aten::elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & elu_(Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale); // {"schema": "aten::elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & glu_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor glu(const Tensor & self, int64_t dim); // {"schema": "aten::glu(Tensor self, int dim=-1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & glu_backward_out(const Tensor & grad_output, const Tensor & self, int64_t dim, Tensor & grad_input); // {"schema": "aten::glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor glu_backward(const Tensor & grad_output, const Tensor & self, int64_t dim); // {"schema": "aten::glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor glu_jvp(const Tensor & glu, const Tensor & x, const Tensor & dx, int64_t dim); // {"schema": "aten::glu_jvp(Tensor glu, Tensor x, Tensor dx, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor glu_backward_jvp(const Tensor & grad_x, const Tensor & grad_glu, const Tensor & x, const Tensor & dgrad_glu, const Tensor & dx, int64_t dim); // {"schema": "aten::glu_backward_jvp(Tensor grad_x, Tensor grad_glu, Tensor x, Tensor dgrad_glu, Tensor dx, int dim) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardsigmoid_out(const Tensor & self, Tensor & out); // {"schema": "aten::hardsigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardsigmoid(const Tensor & self); // {"schema": "aten::hardsigmoid(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hardsigmoid_(Tensor & self); // {"schema": "aten::hardsigmoid_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hardsigmoid_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & grad_input); // {"schema": "aten::hardsigmoid_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardsigmoid_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & hardtanh_out(const Tensor & self, const Scalar & min_val, const Scalar & max_val, Tensor & out); // {"schema": "aten::hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardtanh(const Tensor & self, const Scalar & min_val, const Scalar & max_val); // {"schema": "aten::hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardtanh_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & min_val, const Scalar & max_val, Tensor & grad_input); // {"schema": "aten::hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardtanh_backward(const Tensor & grad_output, const Tensor & self, const Scalar & min_val, const Scalar & max_val); // {"schema": "aten::hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardtanh_(Tensor & self, const Scalar & min_val, const Scalar & max_val); // {"schema": "aten::hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & hardswish_out(const Tensor & self, Tensor & out); // {"schema": "aten::hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardswish(const Tensor & self); // {"schema": "aten::hardswish(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & hardswish_(Tensor & self); // {"schema": "aten::hardswish_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor hardswish_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::hardswish_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & leaky_relu_out(const Tensor & self, const Scalar & negative_slope, Tensor & out); // {"schema": "aten::leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor leaky_relu(const Tensor & self, const Scalar & negative_slope); // {"schema": "aten::leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & leaky_relu_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & negative_slope, bool self_is_result, Tensor & grad_input); // {"schema": "aten::leaky_relu_backward.grad_input(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor leaky_relu_backward(const Tensor & grad_output, const Tensor & self, const Scalar & negative_slope, bool self_is_result); // {"schema": "aten::leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & leaky_relu_(Tensor & self, const Scalar & negative_slope); // {"schema": "aten::leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & log_sigmoid_out(const Tensor & self, Tensor & out); // {"schema": "aten::log_sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor log_sigmoid(const Tensor & self); // {"schema": "aten::log_sigmoid(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple log_sigmoid_forward_out(const Tensor & self, Tensor & output, Tensor & buffer); // {"schema": "aten::log_sigmoid_forward.output(Tensor self, *, Tensor(a!) output, Tensor(b!) buffer) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple log_sigmoid_forward(const Tensor & self); // {"schema": "aten::log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)", "dispatch": "True", "default": "False"} +Tensor & log_sigmoid_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & buffer, Tensor & grad_input); // {"schema": "aten::log_sigmoid_backward.grad_input(Tensor grad_output, Tensor self, Tensor buffer, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor log_sigmoid_backward(const Tensor & grad_output, const Tensor & self, const Tensor & buffer); // {"schema": "aten::log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & rrelu_with_noise_out(const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, c10::optional generator, Tensor & out); // {"schema": "aten::rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor rrelu_with_noise(const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, c10::optional generator); // {"schema": "aten::rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, bool self_is_result); // {"schema": "aten::rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & rrelu_with_noise_(Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, c10::optional generator); // {"schema": "aten::rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & softplus_out(const Tensor & self, const Scalar & beta, const Scalar & threshold, Tensor & out); // {"schema": "aten::softplus.out(Tensor self, Scalar beta=1, Scalar threshold=20, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softplus(const Tensor & self, const Scalar & beta, const Scalar & threshold); // {"schema": "aten::softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & softplus_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & beta, const Scalar & threshold, Tensor & grad_input); // {"schema": "aten::softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softplus_backward(const Tensor & grad_output, const Tensor & self, const Scalar & beta, const Scalar & threshold); // {"schema": "aten::softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & softshrink_out(const Tensor & self, const Scalar & lambd, Tensor & out); // {"schema": "aten::softshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softshrink(const Tensor & self, const Scalar & lambd); // {"schema": "aten::softshrink(Tensor self, Scalar lambd=0.5) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & softshrink_backward_out(const Tensor & grad_output, const Tensor & self, const Scalar & lambd, Tensor & grad_input); // {"schema": "aten::softshrink_backward.grad_input(Tensor grad_output, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor softshrink_backward(const Tensor & grad_output, const Tensor & self, const Scalar & lambd); // {"schema": "aten::softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & adaptive_avg_pool2d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::adaptive_avg_pool2d.out(Tensor self, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_avg_pool2d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::adaptive_avg_pool2d(Tensor self, SymInt[2] output_size) -> Tensor", "dispatch": "False", "default": "True"} +Tensor mkldnn_adaptive_avg_pool2d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & mkldnn_adaptive_avg_pool2d_out(const Tensor & self, IntArrayRef output_size, Tensor & out); // {"schema": "aten::mkldnn_adaptive_avg_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor mkldnn_adaptive_avg_pool2d_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::mkldnn_adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _adaptive_avg_pool2d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::_adaptive_avg_pool2d(Tensor self, SymInt[2] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _adaptive_avg_pool2d_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::_adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & adaptive_avg_pool3d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_avg_pool3d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _adaptive_avg_pool3d(const Tensor & self, c10::SymIntArrayRef output_size); // {"schema": "aten::_adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & adaptive_avg_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & grad_input); // {"schema": "aten::adaptive_avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _adaptive_avg_pool3d_backward(const Tensor & grad_output, const Tensor & self); // {"schema": "aten::_adaptive_avg_pool3d_backward(Tensor grad_output, Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple adaptive_max_pool2d_out(const Tensor & self, IntArrayRef output_size, Tensor & out, Tensor & indices); // {"schema": "aten::adaptive_max_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple adaptive_max_pool2d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & adaptive_max_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::adaptive_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_max_pool2d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & indices); // {"schema": "aten::adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple adaptive_max_pool3d_out(const Tensor & self, IntArrayRef output_size, Tensor & out, Tensor & indices); // {"schema": "aten::adaptive_max_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple adaptive_max_pool3d(const Tensor & self, IntArrayRef output_size); // {"schema": "aten::adaptive_max_pool3d(Tensor self, int[3] output_size) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & adaptive_max_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::adaptive_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor adaptive_max_pool3d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & indices); // {"schema": "aten::adaptive_max_pool3d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override, Tensor & out); // {"schema": "aten::avg_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override); // {"schema": "aten::avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override, Tensor & grad_input); // {"schema": "aten::avg_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool2d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override); // {"schema": "aten::avg_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override, Tensor & out); // {"schema": "aten::avg_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override); // {"schema": "aten::avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & avg_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override, Tensor & grad_input); // {"schema": "aten::avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor avg_pool3d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional divisor_override); // {"schema": "aten::avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple fractional_max_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples, Tensor & output, Tensor & indices); // {"schema": "aten::fractional_max_pool2d.output(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple fractional_max_pool2d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples); // {"schema": "aten::fractional_max_pool2d(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & fractional_max_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fractional_max_pool2d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices); // {"schema": "aten::fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple fractional_max_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples, Tensor & output, Tensor & indices); // {"schema": "aten::fractional_max_pool3d.output(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple fractional_max_pool3d(const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples); // {"schema": "aten::fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & fractional_max_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor fractional_max_pool3d_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices); // {"schema": "aten::fractional_max_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple max_pool2d_with_indices_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out, Tensor & indices); // {"schema": "aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple max_pool2d_with_indices(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "True"} +Tensor & max_pool2d_with_indices_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::max_pool2d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_pool2d_with_indices_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices); // {"schema": "aten::max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple max_pool3d_with_indices_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out, Tensor & indices); // {"schema": "aten::max_pool3d_with_indices.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "False"} +::std::tuple max_pool3d_with_indices(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode); // {"schema": "aten::max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor & max_pool3d_with_indices_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices, Tensor & grad_input); // {"schema": "aten::max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_pool3d_with_indices_backward(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices); // {"schema": "aten::max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & max_unpool2d_out(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_unpool2d(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size); // {"schema": "aten::max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & max_unpool3d_out(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size, IntArrayRef stride, IntArrayRef padding, Tensor & out); // {"schema": "aten::max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor max_unpool3d(const Tensor & self, const Tensor & indices, c10::SymIntArrayRef output_size, IntArrayRef stride, IntArrayRef padding); // {"schema": "aten::max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & reflection_pad1d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::reflection_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad1d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad1d(Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reflection_pad1d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad1d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad1d_backward(Tensor grad_output, Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reflection_pad2d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::reflection_pad2d.out(Tensor self, SymInt[4] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad2d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad2d(Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & reflection_pad2d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad2d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & reflection_pad3d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::reflection_pad3d.out(Tensor self, SymInt[6] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad3d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad3d(Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & reflection_pad3d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::reflection_pad3d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[6] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor reflection_pad3d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::reflection_pad3d_backward(Tensor grad_output, Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad1d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::replication_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad1d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad1d(Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad1d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::replication_pad1d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad1d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad1d_backward(Tensor grad_output, Tensor self, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad2d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::replication_pad2d.out(Tensor self, SymInt[4] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad2d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad2d(Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad2d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad2d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & replication_pad3d_out(const Tensor & self, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::replication_pad3d.out(Tensor self, SymInt[6] padding, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad3d(const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad3d(Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & replication_pad3d_backward_out(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding, Tensor & grad_input); // {"schema": "aten::replication_pad3d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[6] padding, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor replication_pad3d_backward(const Tensor & grad_output, const Tensor & self, c10::SymIntArrayRef padding); // {"schema": "aten::replication_pad3d_backward(Tensor grad_output, Tensor self, SymInt[6] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _pad_circular(const Tensor & self, c10::SymIntArrayRef pad); // {"schema": "aten::_pad_circular(Tensor self, SymInt[] pad) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _pad_enum(const Tensor & self, c10::SymIntArrayRef pad, int64_t mode, c10::optional value); // {"schema": "aten::_pad_enum(Tensor self, SymInt[] pad, int mode, float? value=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor pad(const Tensor & self, c10::SymIntArrayRef pad, c10::string_view mode, c10::optional value); // {"schema": "aten::pad(Tensor self, SymInt[] pad, str mode=\"constant\", float? value=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_linear1d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, c10::optional> scale_factors); // {"schema": "aten::upsample_linear1d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_bilinear2d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, c10::optional> scale_factors); // {"schema": "aten::upsample_bilinear2d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_bilinear2d_aa(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, c10::optional> scale_factors); // {"schema": "aten::_upsample_bilinear2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_trilinear3d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, c10::optional> scale_factors); // {"schema": "aten::upsample_trilinear3d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_bicubic2d(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, c10::optional> scale_factors); // {"schema": "aten::upsample_bicubic2d.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_bicubic2d_aa(const Tensor & input, OptionalSymIntArrayRef output_size, bool align_corners, c10::optional> scale_factors); // {"schema": "aten::_upsample_bicubic2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_nearest1d(const Tensor & input, OptionalSymIntArrayRef output_size, c10::optional> scale_factors); // {"schema": "aten::upsample_nearest1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_nearest_exact1d(const Tensor & input, OptionalSymIntArrayRef output_size, c10::optional> scale_factors); // {"schema": "aten::_upsample_nearest_exact1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_nearest2d(const Tensor & input, OptionalSymIntArrayRef output_size, c10::optional> scale_factors); // {"schema": "aten::upsample_nearest2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_nearest_exact2d(const Tensor & input, OptionalSymIntArrayRef output_size, c10::optional> scale_factors); // {"schema": "aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor upsample_nearest3d(const Tensor & input, OptionalSymIntArrayRef output_size, c10::optional> scale_factors); // {"schema": "aten::upsample_nearest3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _upsample_nearest_exact3d(const Tensor & input, OptionalSymIntArrayRef output_size, c10::optional> scale_factors); // {"schema": "aten::_upsample_nearest_exact3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & upsample_linear1d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales, Tensor & out); // {"schema": "aten::upsample_linear1d.out(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_linear1d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales); // {"schema": "aten::upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_linear1d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales, Tensor & grad_input); // {"schema": "aten::upsample_linear1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_linear1d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales); // {"schema": "aten::upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bilinear2d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::upsample_bilinear2d.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bilinear2d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_bilinear2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bilinear2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_bilinear2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bilinear2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_bilinear2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bilinear2d_aa_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bilinear2d_aa(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bilinear2d_aa_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bilinear2d_aa_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_bilinear2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bicubic2d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::upsample_bicubic2d.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bicubic2d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_bicubic2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_bicubic2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_bicubic2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_bicubic2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_bicubic2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bicubic2d_aa_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bicubic2d_aa(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _upsample_bicubic2d_aa_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _upsample_bicubic2d_aa_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_bicubic2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_trilinear3d_out(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::upsample_trilinear3d.out(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_trilinear3d(const Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_trilinear3d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_trilinear3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_trilinear3d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest1d_out(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales, Tensor & out); // {"schema": "aten::upsample_nearest1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact1d_out(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales, Tensor & out); // {"schema": "aten::_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest1d(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales); // {"schema": "aten::upsample_nearest1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact1d(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales); // {"schema": "aten::_upsample_nearest_exact1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest1d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales, Tensor & grad_input); // {"schema": "aten::upsample_nearest1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact1d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales, Tensor & grad_input); // {"schema": "aten::_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest1d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales); // {"schema": "aten::upsample_nearest1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact1d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales); // {"schema": "aten::_upsample_nearest_exact1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest2d_out(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::upsample_nearest2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact2d_out(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest2d(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_nearest2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact2d(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_nearest2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact2d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_nearest2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact2d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest3d_out(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::upsample_nearest3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact3d_out(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, Tensor & out); // {"schema": "aten::_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest3d(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact3d(const Tensor & self, c10::SymIntArrayRef output_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_nearest_exact3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & upsample_nearest3d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::upsample_nearest3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & _upsample_nearest_exact3d_backward_out(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, Tensor & grad_input); // {"schema": "aten::_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor upsample_nearest3d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _upsample_nearest_exact3d_backward(const Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); // {"schema": "aten::_upsample_nearest_exact3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sigmoid_backward_out(const Tensor & grad_output, const Tensor & output, Tensor & grad_input); // {"schema": "aten::sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor sigmoid_backward(const Tensor & grad_output, const Tensor & output); // {"schema": "aten::sigmoid_backward(Tensor grad_output, Tensor output) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & logit_backward_out(const Tensor & grad_output, const Tensor & self, c10::optional eps, Tensor & grad_input); // {"schema": "aten::logit_backward.grad_input(Tensor grad_output, Tensor self, float? eps=None, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor logit_backward(const Tensor & grad_output, const Tensor & self, c10::optional eps); // {"schema": "aten::logit_backward(Tensor grad_output, Tensor self, float? eps=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tanh_backward_out(const Tensor & grad_output, const Tensor & output, Tensor & grad_input); // {"schema": "aten::tanh_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor tanh_backward(const Tensor & grad_output, const Tensor & output); // {"schema": "aten::tanh_backward(Tensor grad_output, Tensor output) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & slow_conv_transpose2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor slow_conv_transpose2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & slow_conv_transpose3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor slow_conv_transpose3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & thnn_conv2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor thnn_conv2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & _slow_conv2d_forward_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & output); // {"schema": "aten::_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _slow_conv2d_forward(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::_slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _slow_conv2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & grad_input, Tensor & grad_weight, Tensor & grad_bias); // {"schema": "aten::_slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "False"} +::std::tuple _slow_conv2d_backward(const Tensor & grad_output, const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask); // {"schema": "aten::_slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)", "dispatch": "True", "default": "False"} +const Tensor & _conv_depthwise2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, const Tensor & out); // {"schema": "aten::_conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _conv_depthwise2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::_conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor", "dispatch": "True", "default": "False"} +Tensor conv_depthwise3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & slow_conv3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & out); // {"schema": "aten::slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor slow_conv3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & slow_conv3d_forward_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, Tensor & output); // {"schema": "aten::slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor slow_conv3d_forward(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); // {"schema": "aten::slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor", "dispatch": "True", "default": "False"} +Tensor slow_conv_dilated2d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor slow_conv_dilated3d(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation); // {"schema": "aten::slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & col2im_out(const Tensor & self, c10::SymIntArrayRef output_size, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride, Tensor & out); // {"schema": "aten::col2im.out(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor col2im(const Tensor & self, c10::SymIntArrayRef output_size, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride); // {"schema": "aten::col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor", "dispatch": "True", "default": "False"} +Tensor column_stack(TensorList tensors); // {"schema": "aten::column_stack(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & column_stack_out(TensorList tensors, Tensor & out); // {"schema": "aten::column_stack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & im2col_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride, Tensor & out); // {"schema": "aten::im2col.out(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor im2col(const Tensor & self, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride); // {"schema": "aten::im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor", "dispatch": "True", "default": "False"} +Tensor isfinite(const Tensor & self); // {"schema": "aten::isfinite(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor isinf(const Tensor & self); // {"schema": "aten::isinf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +void record_stream(Tensor & self, Stream s); // {"schema": "aten::record_stream(Tensor(a!) self, Stream s) -> ()", "dispatch": "True", "default": "False"} +Tensor isposinf(const Tensor & self); // {"schema": "aten::isposinf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isposinf_out(const Tensor & self, Tensor & out); // {"schema": "aten::isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor isneginf(const Tensor & self); // {"schema": "aten::isneginf(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isneginf_out(const Tensor & self, Tensor & out); // {"schema": "aten::isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _add_batch_dim(const Tensor & self, int64_t batch_dim, int64_t level); // {"schema": "aten::_add_batch_dim(Tensor self, int batch_dim, int level) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _remove_batch_dim(const Tensor & self, int64_t level, int64_t batch_size, int64_t out_dim); // {"schema": "aten::_remove_batch_dim(Tensor self, int level, int batch_size, int out_dim) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_entr(const Tensor & self); // {"schema": "aten::special_entr(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_entr_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_entr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_ndtri(const Tensor & self); // {"schema": "aten::special_ndtri(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_ndtri_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_ndtri.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_log_ndtr(const Tensor & self); // {"schema": "aten::special_log_ndtr(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_log_ndtr_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_log_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_expm1(const Tensor & self); // {"schema": "aten::special_expm1(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_expm1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_exp2(const Tensor & self); // {"schema": "aten::special_exp2(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_exp2_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_exp2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_psi(const Tensor & self); // {"schema": "aten::special_psi(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_psi_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_psi.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_digamma(const Tensor & self); // {"schema": "aten::special_digamma(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_digamma_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_gammaln(const Tensor & self); // {"schema": "aten::special_gammaln(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_gammaln_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_gammaln.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_erf(const Tensor & self); // {"schema": "aten::special_erf(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_erf_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_erfc(const Tensor & self); // {"schema": "aten::special_erfc(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_erfc_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_erfcx(const Tensor & self); // {"schema": "aten::special_erfcx(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_erfcx_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erfcx.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_erfinv(const Tensor & self); // {"schema": "aten::special_erfinv(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_erfinv_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_ndtr(const Tensor & self); // {"schema": "aten::special_ndtr(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_ndtr_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_xlog1py(const Tensor & self, const Tensor & other); // {"schema": "aten::special_xlog1py(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_xlog1py(const Scalar & self, const Tensor & other); // {"schema": "aten::special_xlog1py.self_scalar(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_xlog1py(const Tensor & self, const Scalar & other); // {"schema": "aten::special_xlog1py.other_scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_xlog1py_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlog1py.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_xlog1py_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlog1py.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_xlog1py_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::special_xlog1py.other_scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_xlogy(const Tensor & self, const Tensor & other); // {"schema": "aten::special_xlogy(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_xlogy(const Scalar & self, const Tensor & other); // {"schema": "aten::special_xlogy.self_scalar(Scalar self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_xlogy(const Tensor & self, const Scalar & other); // {"schema": "aten::special_xlogy.other_scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_xlogy_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlogy.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & special_xlogy_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_xlogy.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & special_xlogy_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::special_xlogy.other_scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_zeta(const Tensor & self, const Tensor & other); // {"schema": "aten::special_zeta(Tensor self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_zeta(const Scalar & self, const Tensor & other); // {"schema": "aten::special_zeta.self_scalar(Scalar self, Tensor other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_zeta(const Tensor & self, const Scalar & other); // {"schema": "aten::special_zeta.other_scalar(Tensor self, Scalar other) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_zeta_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_zeta.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_zeta_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_zeta.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_zeta_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::special_zeta.other_scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_i0(const Tensor & self); // {"schema": "aten::special_i0(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_i0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_i0e(const Tensor & self); // {"schema": "aten::special_i0e(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_i0e_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i0e.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_i1(const Tensor & self); // {"schema": "aten::special_i1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_i1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_i1e(const Tensor & self); // {"schema": "aten::special_i1e(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_i1e_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_i1e.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_logit(const Tensor & self, c10::optional eps); // {"schema": "aten::special_logit(Tensor self, float? eps=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_logit_out(const Tensor & self, c10::optional eps, Tensor & out); // {"schema": "aten::special_logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_polygamma(int64_t n, const Tensor & self); // {"schema": "aten::special_polygamma(int n, Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_polygamma_out(int64_t n, const Tensor & self, Tensor & out); // {"schema": "aten::special_polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_logsumexp(const Tensor & self, IntArrayRef dim, bool keepdim); // {"schema": "aten::special_logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_logsumexp_out(const Tensor & self, IntArrayRef dim, bool keepdim, Tensor & out); // {"schema": "aten::special_logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_expit(const Tensor & self); // {"schema": "aten::special_expit(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_expit_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_expit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_sinc(const Tensor & self); // {"schema": "aten::special_sinc(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_sinc_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_sinc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_round(const Tensor & self, int64_t decimals); // {"schema": "aten::special_round(Tensor self, *, int decimals=0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_round_out(const Tensor & self, int64_t decimals, Tensor & out); // {"schema": "aten::special_round.out(Tensor self, *, int decimals=0, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_log1p(const Tensor & self); // {"schema": "aten::special_log1p(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_log1p_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_log_softmax(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::special_log_softmax(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_gammainc_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_gammainc.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_gammainc(const Tensor & self, const Tensor & other); // {"schema": "aten::special_gammainc(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_gammaincc_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::special_gammaincc.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_gammaincc(const Tensor & self, const Tensor & other); // {"schema": "aten::special_gammaincc(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor special_multigammaln(const Tensor & self, int64_t p); // {"schema": "aten::special_multigammaln(Tensor self, int p) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & special_multigammaln_out(const Tensor & self, int64_t p, Tensor & out); // {"schema": "aten::special_multigammaln.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor special_softmax(const Tensor & self, int64_t dim, c10::optional dtype); // {"schema": "aten::special_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fft_fft(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm); // {"schema": "aten::fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_fft_out(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ifft(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm); // {"schema": "aten::fft_ifft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ifft_out(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_ifft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_rfft(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm); // {"schema": "aten::fft_rfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_rfft_out(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_rfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_irfft(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm); // {"schema": "aten::fft_irfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_irfft_out(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_irfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_hfft(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm); // {"schema": "aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_hfft_out(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ihfft(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm); // {"schema": "aten::fft_ihfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ihfft_out(const Tensor & self, c10::optional n, int64_t dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_ihfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_fft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_fft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_fft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_fft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ifft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_ifft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ifft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_ifft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_rfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_rfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_rfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_rfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_irfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_irfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_irfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_hfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_hfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_hfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm, const Tensor & out); // {"schema": "aten::fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ihfft2(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_ihfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_ihfft2_out(const Tensor & self, OptionalSymIntArrayRef s, IntArrayRef dim, c10::optional norm, const Tensor & out); // {"schema": "aten::fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_fftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_fftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_fftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_fftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ifftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_ifftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_ifftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_ifftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_rfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_rfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_rfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_rfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_irfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_irfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & fft_irfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm, Tensor & out); // {"schema": "aten::fft_irfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_hfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_hfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm, const Tensor & out); // {"schema": "aten::fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_ihfftn(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm); // {"schema": "aten::fft_ihfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor", "dispatch": "False", "default": "True"} +const Tensor & fft_ihfftn_out(const Tensor & self, OptionalSymIntArrayRef s, OptionalIntArrayRef dim, c10::optional norm, const Tensor & out); // {"schema": "aten::fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor fft_fftfreq(int64_t n, double d, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fft_fftfreq_out(int64_t n, double d, Tensor & out); // {"schema": "aten::fft_fftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor fft_rfftfreq(int64_t n, double d, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::fft_rfftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & fft_rfftfreq_out(int64_t n, double d, Tensor & out); // {"schema": "aten::fft_rfftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor fft_fftshift(const Tensor & self, OptionalIntArrayRef dim); // {"schema": "aten::fft_fftshift(Tensor self, int[1]? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor fft_ifftshift(const Tensor & self, OptionalIntArrayRef dim); // {"schema": "aten::fft_ifftshift(Tensor self, int[1]? dim=None) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple linalg_cholesky_ex(const Tensor & self, bool upper, bool check_errors); // {"schema": "aten::linalg_cholesky_ex(Tensor self, *, bool upper=False, bool check_errors=False) -> (Tensor L, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_cholesky_ex_out(const Tensor & self, bool upper, bool check_errors, Tensor & L, Tensor & info); // {"schema": "aten::linalg_cholesky_ex.L(Tensor self, *, bool upper=False, bool check_errors=False, Tensor(a!) L, Tensor(b!) info) -> (Tensor(a!) L, Tensor(b!) info)", "dispatch": "True", "default": "False"} +Tensor linalg_cholesky(const Tensor & self, bool upper); // {"schema": "aten::linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_cholesky_out(const Tensor & self, bool upper, Tensor & out); // {"schema": "aten::linalg_cholesky.out(Tensor self, *, bool upper=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_cross(const Tensor & self, const Tensor & other, int64_t dim); // {"schema": "aten::linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_cross_out(const Tensor & self, const Tensor & other, int64_t dim, Tensor & out); // {"schema": "aten::linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple linalg_lu_factor(const Tensor & A, bool pivot); // {"schema": "aten::linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)", "dispatch": "False", "default": "True"} +::std::tuple linalg_lu_factor_out(const Tensor & A, bool pivot, Tensor & LU, Tensor & pivots); // {"schema": "aten::linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)", "dispatch": "False", "default": "True"} +::std::tuple linalg_lu_factor_ex(const Tensor & A, bool pivot, bool check_errors); // {"schema": "aten::linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_lu_factor_ex_out(const Tensor & A, bool pivot, bool check_errors, Tensor & LU, Tensor & pivots, Tensor & info); // {"schema": "aten::linalg_lu_factor_ex.out(Tensor A, *, bool pivot=True, bool check_errors=False, Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info)", "dispatch": "True", "default": "False"} +::std::tuple linalg_lu(const Tensor & A, bool pivot); // {"schema": "aten::linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)", "dispatch": "True", "default": "True"} +::std::tuple linalg_lu_out(const Tensor & A, bool pivot, Tensor & P, Tensor & L, Tensor & U); // {"schema": "aten::linalg_lu.out(Tensor A, *, bool pivot=True, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)", "dispatch": "True", "default": "False"} +Tensor linalg_lu_solve(const Tensor & LU, const Tensor & pivots, const Tensor & B, bool left, bool adjoint); // {"schema": "aten::linalg_lu_solve(Tensor LU, Tensor pivots, Tensor B, *, bool left=True, bool adjoint=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_lu_solve_out(const Tensor & LU, const Tensor & pivots, const Tensor & B, bool left, bool adjoint, Tensor & out); // {"schema": "aten::linalg_lu_solve.out(Tensor LU, Tensor pivots, Tensor B, *, bool left=True, bool adjoint=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple _linalg_det(const Tensor & A); // {"schema": "aten::_linalg_det(Tensor A) -> (Tensor result, Tensor LU, Tensor pivots)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_det_out(const Tensor & A, Tensor & result, Tensor & LU, Tensor & pivots); // {"schema": "aten::_linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)", "dispatch": "True", "default": "False"} +Tensor linalg_det(const Tensor & A); // {"schema": "aten::linalg_det(Tensor A) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_det_out(const Tensor & A, Tensor & out); // {"schema": "aten::linalg_det.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor det(const Tensor & self); // {"schema": "aten::det(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple linalg_ldl_factor_ex(const Tensor & self, bool hermitian, bool check_errors); // {"schema": "aten::linalg_ldl_factor_ex(Tensor self, *, bool hermitian=False, bool check_errors=False) -> (Tensor LD, Tensor pivots, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_ldl_factor_ex_out(const Tensor & self, bool hermitian, bool check_errors, Tensor & LD, Tensor & pivots, Tensor & info); // {"schema": "aten::linalg_ldl_factor_ex.out(Tensor self, *, bool hermitian=False, bool check_errors=False, Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info)", "dispatch": "True", "default": "False"} +::std::tuple linalg_ldl_factor(const Tensor & self, bool hermitian); // {"schema": "aten::linalg_ldl_factor(Tensor self, *, bool hermitian=False) -> (Tensor LD, Tensor pivots)", "dispatch": "False", "default": "True"} +::std::tuple linalg_ldl_factor_out(const Tensor & self, bool hermitian, Tensor & LD, Tensor & pivots); // {"schema": "aten::linalg_ldl_factor.out(Tensor self, *, bool hermitian=False, Tensor(a!) LD, Tensor(b!) pivots) -> (Tensor(a!) LD, Tensor(b!) pivots)", "dispatch": "False", "default": "True"} +Tensor linalg_ldl_solve(const Tensor & LD, const Tensor & pivots, const Tensor & B, bool hermitian); // {"schema": "aten::linalg_ldl_solve(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_ldl_solve_out(const Tensor & LD, const Tensor & pivots, const Tensor & B, bool hermitian, Tensor & out); // {"schema": "aten::linalg_ldl_solve.out(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple linalg_lstsq(const Tensor & self, const Tensor & b, c10::optional rcond, c10::optional driver); // {"schema": "aten::linalg_lstsq(Tensor self, Tensor b, float? rcond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)", "dispatch": "True", "default": "True"} +::std::tuple linalg_lstsq_out(const Tensor & self, const Tensor & b, c10::optional rcond, c10::optional driver, Tensor & solution, Tensor & residuals, Tensor & rank, Tensor & singular_values); // {"schema": "aten::linalg_lstsq.out(Tensor self, Tensor b, float? rcond=None, *, str? driver=None, Tensor(a!) solution, Tensor(b!) residuals, Tensor(c!) rank, Tensor(d!) singular_values) -> (Tensor(a!) solution, Tensor(b!) residuals, Tensor(c!) rank, Tensor(d!) singular_values)", "dispatch": "True", "default": "False"} +Tensor linalg_matmul(const Tensor & self, const Tensor & other); // {"schema": "aten::linalg_matmul(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matmul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_vecdot(const Tensor & x, const Tensor & y, int64_t dim); // {"schema": "aten::linalg_vecdot(Tensor x, Tensor y, *, int dim=-1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_vecdot_out(const Tensor & x, const Tensor & y, int64_t dim, Tensor & out); // {"schema": "aten::linalg_vecdot.out(Tensor x, Tensor y, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_exp(const Tensor & self); // {"schema": "aten::linalg_matrix_exp(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _linalg_slogdet(const Tensor & A); // {"schema": "aten::_linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet, Tensor LU, Tensor pivots)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_slogdet_out(const Tensor & A, Tensor & sign, Tensor & logabsdet, Tensor & LU, Tensor & pivots); // {"schema": "aten::_linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)", "dispatch": "True", "default": "False"} +::std::tuple linalg_slogdet(const Tensor & A); // {"schema": "aten::linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet)", "dispatch": "False", "default": "True"} +::std::tuple linalg_slogdet_out(const Tensor & A, Tensor & sign, Tensor & logabsdet); // {"schema": "aten::linalg_slogdet.out(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet)", "dispatch": "False", "default": "True"} +::std::tuple slogdet(const Tensor & self); // {"schema": "aten::slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)", "dispatch": "False", "default": "True"} +::std::tuple slogdet_out(const Tensor & self, Tensor & sign, Tensor & logabsdet); // {"schema": "aten::slogdet.out(Tensor self, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet)", "dispatch": "False", "default": "True"} +Tensor logdet(const Tensor & self); // {"schema": "aten::logdet(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +::std::tuple linalg_eig(const Tensor & self); // {"schema": "aten::linalg_eig(Tensor self) -> (Tensor eigenvalues, Tensor eigenvectors)", "dispatch": "True", "default": "False"} +::std::tuple linalg_eig_out(const Tensor & self, Tensor & eigenvalues, Tensor & eigenvectors); // {"schema": "aten::linalg_eig.out(Tensor self, *, Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)", "dispatch": "True", "default": "False"} +Tensor _linalg_eigvals(const Tensor & self); // {"schema": "aten::_linalg_eigvals(Tensor self) -> Tensor", "dispatch": "True", "default": "False"} +Tensor linalg_eigvals(const Tensor & self); // {"schema": "aten::linalg_eigvals(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_eigvals_out(const Tensor & self, Tensor & out); // {"schema": "aten::linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple _linalg_eigh(const Tensor & A, c10::string_view UPLO, bool compute_v); // {"schema": "aten::_linalg_eigh(Tensor A, str UPLO=\"L\", bool compute_v=True) -> (Tensor eigenvalues, Tensor eigenvectors)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_eigh_out(const Tensor & A, c10::string_view UPLO, bool compute_v, Tensor & eigenvalues, Tensor & eigenvectors); // {"schema": "aten::_linalg_eigh.eigenvalues(Tensor A, str UPLO=\"L\", bool compute_v=True, *, Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)", "dispatch": "True", "default": "False"} +::std::tuple linalg_eigh(const Tensor & self, c10::string_view UPLO); // {"schema": "aten::linalg_eigh(Tensor self, str UPLO=\"L\") -> (Tensor eigenvalues, Tensor eigenvectors)", "dispatch": "False", "default": "True"} +::std::tuple linalg_eigh_out(const Tensor & self, c10::string_view UPLO, Tensor & eigvals, Tensor & eigvecs); // {"schema": "aten::linalg_eigh.eigvals(Tensor self, str UPLO=\"L\", *, Tensor(a!) eigvals, Tensor(b!) eigvecs) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)", "dispatch": "False", "default": "True"} +Tensor linalg_eigvalsh(const Tensor & self, c10::string_view UPLO); // {"schema": "aten::linalg_eigvalsh(Tensor self, str UPLO=\"L\") -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_eigvalsh_out(const Tensor & self, c10::string_view UPLO, Tensor & out); // {"schema": "aten::linalg_eigvalsh.out(Tensor self, str UPLO=\"L\", *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_householder_product(const Tensor & input, const Tensor & tau); // {"schema": "aten::linalg_householder_product(Tensor input, Tensor tau) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & linalg_householder_product_out(const Tensor & input, const Tensor & tau, Tensor & out); // {"schema": "aten::linalg_householder_product.out(Tensor input, Tensor tau, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +::std::tuple linalg_inv_ex(const Tensor & A, bool check_errors); // {"schema": "aten::linalg_inv_ex(Tensor A, *, bool check_errors=False) -> (Tensor inverse, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple linalg_inv_ex_out(const Tensor & A, bool check_errors, Tensor & inverse, Tensor & info); // {"schema": "aten::linalg_inv_ex.inverse(Tensor A, *, bool check_errors=False, Tensor(a!) inverse, Tensor(b!) info) -> (Tensor(a!) inverse, Tensor(b!) info)", "dispatch": "True", "default": "False"} +Tensor linalg_inv(const Tensor & A); // {"schema": "aten::linalg_inv(Tensor A) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_inv_out(const Tensor & A, Tensor & out); // {"schema": "aten::linalg_inv.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor inverse(const Tensor & self); // {"schema": "aten::inverse(Tensor self) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & inverse_out(const Tensor & self, Tensor & out); // {"schema": "aten::inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor inner(const Tensor & self, const Tensor & other); // {"schema": "aten::inner(Tensor self, Tensor other) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & inner_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::inner.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor outer(const Tensor & self, const Tensor & vec2); // {"schema": "aten::outer(Tensor self, Tensor vec2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & outer_out(const Tensor & self, const Tensor & vec2, Tensor & out); // {"schema": "aten::outer.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor ger(const Tensor & self, const Tensor & vec2); // {"schema": "aten::ger(Tensor self, Tensor vec2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & ger_out(const Tensor & self, const Tensor & vec2, Tensor & out); // {"schema": "aten::ger.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_norm(const Tensor & self, const c10::optional & ord, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor linalg_norm(const Tensor & self, c10::string_view ord, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_norm_out(const Tensor & self, const c10::optional & ord, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & linalg_norm_out(const Tensor & self, c10::string_view ord, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_vector_norm(const Tensor & self, const Scalar & ord, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::linalg_vector_norm(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_vector_norm_out(const Tensor & self, const Scalar & ord, OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::linalg_vector_norm.out(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor linalg_matrix_norm(const Tensor & self, const Scalar & ord, IntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_norm_out(const Tensor & self, const Scalar & ord, IntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::linalg_matrix_norm.out(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_norm(const Tensor & self, c10::string_view ord, IntArrayRef dim, bool keepdim, c10::optional dtype); // {"schema": "aten::linalg_matrix_norm.str_ord(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_norm_out(const Tensor & self, c10::string_view ord, IntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::linalg_matrix_norm.str_ord_out(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple _linalg_svd(const Tensor & A, bool full_matrices, bool compute_uv, c10::optional driver); // {"schema": "aten::_linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True, *, str? driver=None) -> (Tensor U, Tensor S, Tensor Vh)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_svd_out(const Tensor & A, bool full_matrices, bool compute_uv, c10::optional driver, Tensor & U, Tensor & S, Tensor & Vh); // {"schema": "aten::_linalg_svd.U(Tensor A, bool full_matrices=False, bool compute_uv=True, *, str? driver=None, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)", "dispatch": "True", "default": "False"} +::std::tuple linalg_svd(const Tensor & A, bool full_matrices, c10::optional driver); // {"schema": "aten::linalg_svd(Tensor A, bool full_matrices=True, *, str? driver=None) -> (Tensor U, Tensor S, Tensor Vh)", "dispatch": "False", "default": "True"} +::std::tuple linalg_svd_out(const Tensor & A, bool full_matrices, c10::optional driver, Tensor & U, Tensor & S, Tensor & Vh); // {"schema": "aten::linalg_svd.U(Tensor A, bool full_matrices=True, *, str? driver=None, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)", "dispatch": "False", "default": "True"} +Tensor linalg_svdvals(const Tensor & A, c10::optional driver); // {"schema": "aten::linalg_svdvals(Tensor A, *, str? driver=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_svdvals_out(const Tensor & A, c10::optional driver, Tensor & out); // {"schema": "aten::linalg_svdvals.out(Tensor A, *, str? driver=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_cond(const Tensor & self, const c10::optional & p); // {"schema": "aten::linalg_cond(Tensor self, Scalar? p=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_cond_out(const Tensor & self, const c10::optional & p, Tensor & out); // {"schema": "aten::linalg_cond.out(Tensor self, Scalar? p=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_cond(const Tensor & self, c10::string_view p); // {"schema": "aten::linalg_cond.p_str(Tensor self, str p) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_cond_out(const Tensor & self, c10::string_view p, Tensor & out); // {"schema": "aten::linalg_cond.p_str_out(Tensor self, str p, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_pinv(const Tensor & self, const c10::optional & atol, const c10::optional & rtol, bool hermitian); // {"schema": "aten::linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, const c10::optional & atol, const c10::optional & rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor linalg_pinv(const Tensor & self, c10::optional atol, c10::optional rtol, bool hermitian); // {"schema": "aten::linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, c10::optional atol, c10::optional rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_pinv(const Tensor & self, double rcond, bool hermitian); // {"schema": "aten::linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor linalg_pinv(const Tensor & self, const Tensor & rcond, bool hermitian); // {"schema": "aten::linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, double rcond, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor & linalg_pinv_out(const Tensor & self, const Tensor & rcond, bool hermitian, Tensor & out); // {"schema": "aten::linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple _linalg_solve_ex(const Tensor & A, const Tensor & B, bool left, bool check_errors); // {"schema": "aten::_linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor LU, Tensor pivots, Tensor info)", "dispatch": "True", "default": "True"} +::std::tuple _linalg_solve_ex_out(const Tensor & A, const Tensor & B, bool left, bool check_errors, Tensor & result, Tensor & LU, Tensor & pivots, Tensor & info); // {"schema": "aten::_linalg_solve_ex.result(Tensor A, Tensor B, *, bool left=True, bool check_errors=False, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots, Tensor(d!) info) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots, Tensor(d!) info)", "dispatch": "True", "default": "False"} +::std::tuple linalg_solve_ex(const Tensor & A, const Tensor & B, bool left, bool check_errors); // {"schema": "aten::linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info)", "dispatch": "False", "default": "True"} +::std::tuple linalg_solve_ex_out(const Tensor & A, const Tensor & B, bool left, bool check_errors, Tensor & result, Tensor & info); // {"schema": "aten::linalg_solve_ex.out(Tensor A, Tensor B, *, bool left=True, bool check_errors=False, Tensor(a!) result, Tensor(b!) info) -> (Tensor(a!) result, Tensor(b!) info)", "dispatch": "False", "default": "True"} +Tensor linalg_solve(const Tensor & A, const Tensor & B, bool left); // {"schema": "aten::linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_solve_out(const Tensor & A, const Tensor & B, bool left, Tensor & out); // {"schema": "aten::linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_tensorinv(const Tensor & self, int64_t ind); // {"schema": "aten::linalg_tensorinv(Tensor self, int ind=2) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_tensorinv_out(const Tensor & self, int64_t ind, Tensor & out); // {"schema": "aten::linalg_tensorinv.out(Tensor self, int ind=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_tensorsolve(const Tensor & self, const Tensor & other, OptionalIntArrayRef dims); // {"schema": "aten::linalg_tensorsolve(Tensor self, Tensor other, int[]? dims=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_tensorsolve_out(const Tensor & self, const Tensor & other, OptionalIntArrayRef dims, Tensor & out); // {"schema": "aten::linalg_tensorsolve.out(Tensor self, Tensor other, int[]? dims=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +::std::tuple linalg_qr(const Tensor & A, c10::string_view mode); // {"schema": "aten::linalg_qr(Tensor A, str mode='reduced') -> (Tensor Q, Tensor R)", "dispatch": "True", "default": "True"} +::std::tuple linalg_qr_out(const Tensor & A, c10::string_view mode, Tensor & Q, Tensor & R); // {"schema": "aten::linalg_qr.out(Tensor A, str mode='reduced', *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)", "dispatch": "True", "default": "False"} +Tensor linalg_matrix_power(const Tensor & self, int64_t n); // {"schema": "aten::linalg_matrix_power(Tensor self, int n) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_power_out(const Tensor & self, int64_t n, Tensor & out); // {"schema": "aten::linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & input, const c10::optional & atol, const c10::optional & rtol, bool hermitian); // {"schema": "aten::linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & input, const c10::optional & atol, const c10::optional & rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & self, c10::optional atol, c10::optional rtol, bool hermitian); // {"schema": "aten::linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & self, c10::optional atol, c10::optional rtol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & self, double tol, bool hermitian); // {"schema": "aten::linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & self, double tol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_matrix_rank(const Tensor & input, const Tensor & tol, bool hermitian); // {"schema": "aten::linalg_matrix_rank.tol_tensor(Tensor input, Tensor tol, bool hermitian=False) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_matrix_rank_out(const Tensor & input, const Tensor & tol, bool hermitian, Tensor & out); // {"schema": "aten::linalg_matrix_rank.out_tol_tensor(Tensor input, Tensor tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor linalg_multi_dot(TensorList tensors); // {"schema": "aten::linalg_multi_dot(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +Tensor & linalg_multi_dot_out(TensorList tensors, Tensor & out); // {"schema": "aten::linalg_multi_dot.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "True"} +Tensor nested_to_padded_tensor(const Tensor & self, double padding, OptionalIntArrayRef output_size); // {"schema": "aten::nested_to_padded_tensor(Tensor self, float padding, int[]? output_size=None) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_serialization_subcmul(const Tensor & self, const Tensor & other, const Scalar & alpha); // {"schema": "aten::_test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_parallel_materialize(const Tensor & self, int64_t num_parallel, bool skip_first); // {"schema": "aten::_test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_optional_intlist(const Tensor & values, OptionalIntArrayRef addends); // {"schema": "aten::_test_optional_intlist(Tensor values, int[]? addends) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _test_optional_filled_intlist(const Tensor & values, OptionalIntArrayRef addends); // {"schema": "aten::_test_optional_filled_intlist(Tensor values, int[2]? addends) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _test_optional_floatlist(const Tensor & values, c10::optional> addends); // {"schema": "aten::_test_optional_floatlist(Tensor values, float[]? addends) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _test_string_default(const Tensor & dummy, c10::string_view a, c10::string_view b); // {"schema": "aten::_test_string_default(Tensor dummy, str a=\"\\\"'\\\\\", str b='\"\\'\\\\') -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_ambiguous_defaults(const Tensor & dummy, int64_t a, int64_t b); // {"schema": "aten::_test_ambiguous_defaults.a(Tensor dummy, int a=1, int b=1) -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_ambiguous_defaults(const Tensor & dummy, int64_t a, c10::string_view b); // {"schema": "aten::_test_ambiguous_defaults.b(Tensor dummy, int a=2, str b=\"2\") -> Tensor", "dispatch": "False", "default": "True"} +Tensor _test_warn_in_autograd(const Tensor & self); // {"schema": "aten::_test_warn_in_autograd(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch(const Tensor & self); // {"schema": "aten::_test_autograd_multiple_dispatch.fullcoverage(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch(const Tensor & self, bool b); // {"schema": "aten::_test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch_view(const Tensor & self); // {"schema": "aten::_test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)", "dispatch": "True", "default": "True"} +Tensor _test_autograd_multiple_dispatch_view_copy(const Tensor & self); // {"schema": "aten::_test_autograd_multiple_dispatch_view_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor segment_reduce(const Tensor & data, c10::string_view reduce, const c10::optional & lengths, const c10::optional & indices, const c10::optional & offsets, int64_t axis, bool unsafe, const c10::optional & initial); // {"schema": "aten::segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, Tensor? offsets=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _segment_reduce_backward(const Tensor & grad, const Tensor & output, const Tensor & data, c10::string_view reduce, const c10::optional & lengths, const c10::optional & offsets, int64_t axis, const c10::optional & initial); // {"schema": "aten::_segment_reduce_backward(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor pad_sequence(TensorList sequences, bool batch_first, double padding_value); // {"schema": "aten::pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0) -> Tensor", "dispatch": "False", "default": "True"} +Tensor flatten_dense_tensors(TensorList tensors); // {"schema": "aten::flatten_dense_tensors(Tensor[] tensors) -> Tensor", "dispatch": "False", "default": "True"} +::std::vector unflatten_dense_tensors(const Tensor & flat, TensorList tensors); // {"schema": "aten::unflatten_dense_tensors(Tensor flat, Tensor[] tensors) -> Tensor[]", "dispatch": "False", "default": "True"} +Tensor _nested_tensor_from_tensor_list(TensorList list, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory); // {"schema": "aten::_nested_tensor_from_tensor_list(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _fw_primal_copy(const Tensor & self, int64_t level); // {"schema": "aten::_fw_primal_copy(Tensor self, int level) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _make_dual_copy(const Tensor & primal, const Tensor & tangent, int64_t level); // {"schema": "aten::_make_dual_copy(Tensor primal, Tensor tangent, int level) -> Tensor", "dispatch": "True", "default": "True"} +Tensor view_as_real_copy(const Tensor & self); // {"schema": "aten::view_as_real_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor view_as_complex_copy(const Tensor & self); // {"schema": "aten::view_as_complex_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _conj_copy(const Tensor & self); // {"schema": "aten::_conj_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _neg_view_copy(const Tensor & self); // {"schema": "aten::_neg_view_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor as_strided_copy(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional storage_offset); // {"schema": "aten::as_strided_copy(Tensor self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _sparse_broadcast_to_copy(const Tensor & self, IntArrayRef size); // {"schema": "aten::_sparse_broadcast_to_copy(Tensor self, int[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor diagonal_copy(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); // {"schema": "aten::diagonal_copy(Tensor self, int offset=0, int dim1=0, int dim2=1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor expand_copy(const Tensor & self, c10::SymIntArrayRef size, bool implicit); // {"schema": "aten::expand_copy(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor permute_copy(const Tensor & self, IntArrayRef dims); // {"schema": "aten::permute_copy(Tensor self, int[] dims) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _reshape_alias_copy(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::_reshape_alias_copy(Tensor self, SymInt[] size, SymInt[] stride) -> Tensor", "dispatch": "True", "default": "True"} +Tensor select_copy(const Tensor & self, int64_t dim, c10::SymInt index); // {"schema": "aten::select_copy.int(Tensor self, int dim, SymInt index) -> Tensor", "dispatch": "True", "default": "True"} +Tensor detach_copy(const Tensor & self); // {"schema": "aten::detach_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor slice_copy(const Tensor & self, int64_t dim, c10::optional start, c10::optional end, c10::SymInt step); // {"schema": "aten::slice_copy.Tensor(Tensor self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor", "dispatch": "True", "default": "True"} +::std::vector split_copy(const Tensor & self, c10::SymInt split_size, int64_t dim); // {"schema": "aten::split_copy.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +::std::vector split_with_sizes_copy(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim); // {"schema": "aten::split_with_sizes_copy(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +Tensor squeeze_copy(const Tensor & self); // {"schema": "aten::squeeze_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor squeeze_copy(const Tensor & self, int64_t dim); // {"schema": "aten::squeeze_copy.dim(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor squeeze_copy(const Tensor & self, IntArrayRef dim); // {"schema": "aten::squeeze_copy.dims(Tensor self, int[] dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor t_copy(const Tensor & self); // {"schema": "aten::t_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor transpose_copy(const Tensor & self, int64_t dim0, int64_t dim1); // {"schema": "aten::transpose_copy.int(Tensor self, int dim0, int dim1) -> Tensor", "dispatch": "True", "default": "True"} +Tensor unsqueeze_copy(const Tensor & self, int64_t dim); // {"schema": "aten::unsqueeze_copy(Tensor self, int dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _indices_copy(const Tensor & self); // {"schema": "aten::_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor _values_copy(const Tensor & self); // {"schema": "aten::_values_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor indices_copy(const Tensor & self); // {"schema": "aten::indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor values_copy(const Tensor & self); // {"schema": "aten::values_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor crow_indices_copy(const Tensor & self); // {"schema": "aten::crow_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor col_indices_copy(const Tensor & self); // {"schema": "aten::col_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor ccol_indices_copy(const Tensor & self); // {"schema": "aten::ccol_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor row_indices_copy(const Tensor & self); // {"schema": "aten::row_indices_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +::std::vector unbind_copy(const Tensor & self, int64_t dim); // {"schema": "aten::unbind_copy.int(Tensor self, int dim=0) -> Tensor[]", "dispatch": "True", "default": "True"} +void unbind_copy_out(const Tensor & self, int64_t dim, TensorList out); // {"schema": "aten::unbind_copy.int_out(Tensor self, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void split_copy_out(const Tensor & self, c10::SymInt split_size, int64_t dim, TensorList out); // {"schema": "aten::split_copy.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void split_with_sizes_copy_out(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim, TensorList out); // {"schema": "aten::split_with_sizes_copy.out(Tensor self, SymInt[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor view_copy(const Tensor & self, c10::SymIntArrayRef size); // {"schema": "aten::view_copy(Tensor self, SymInt[] size) -> Tensor", "dispatch": "True", "default": "True"} +Tensor view_copy(const Tensor & self, ScalarType dtype); // {"schema": "aten::view_copy.dtype(Tensor self, ScalarType dtype) -> Tensor", "dispatch": "True", "default": "True"} +Tensor unfold_copy(const Tensor & self, int64_t dimension, int64_t size, int64_t step); // {"schema": "aten::unfold_copy(Tensor self, int dimension, int size, int step) -> Tensor", "dispatch": "True", "default": "True"} +Tensor alias_copy(const Tensor & self); // {"schema": "aten::alias_copy(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor to_padded_tensor(const Tensor & self, double padding, OptionalSymIntArrayRef output_size); // {"schema": "aten::to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _nested_tensor_softmax_with_shape(const Tensor & self, const Tensor & query); // {"schema": "aten::_nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor", "dispatch": "True", "default": "False"} +Tensor _transformer_encoder_layer_fwd(const Tensor & src, int64_t embed_dim, int64_t num_heads, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const Tensor & norm_weight_1, const Tensor & norm_bias_1, const Tensor & norm_weight_2, const Tensor & norm_bias_2, const Tensor & ffn_weight_1, const Tensor & ffn_bias_1, const Tensor & ffn_weight_2, const Tensor & ffn_bias_2, const c10::optional & mask, c10::optional mask_type); // {"schema": "aten::_transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor", "dispatch": "True", "default": "False"} +::std::tuple _native_multi_head_attention(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const c10::optional & mask, bool need_weights, bool average_attn_weights, c10::optional mask_type); // {"schema": "aten::_native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor scaled_dot_product_attention(const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & attn_mask, double dropout_p, bool is_causal, c10::optional scale); // {"schema": "aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor", "dispatch": "False", "default": "True"} +int64_t _fused_sdp_choice(const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & attn_mask, double dropout_p, bool is_causal, c10::optional scale); // {"schema": "aten::_fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_attention_math(const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & attn_mask, double dropout_p, bool is_causal, const c10::optional & dropout_mask, c10::optional scale); // {"schema": "aten::_scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)", "dispatch": "False", "default": "True"} +::std::tuple _scaled_dot_product_flash_attention(const Tensor & query, const Tensor & key, const Tensor & value, double dropout_p, bool is_causal, bool return_debug_mask, c10::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_flash_attention_for_cpu(const Tensor & query, const Tensor & key, const Tensor & value, double dropout_p, bool is_causal, const c10::optional & attn_mask, c10::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_flash_attention_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, const Tensor & cum_seq_q, const Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const Tensor & philox_seed, const Tensor & philox_offset, c10::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_flash_attention_for_cpu_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, double dropout_p, bool is_causal, const c10::optional & attn_mask, c10::optional scale); // {"schema": "aten::_scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_efficient_attention(const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & attn_bias, bool compute_log_sumexp, double dropout_p, bool is_causal, c10::optional scale); // {"schema": "aten::_scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_efficient_attention_backward(const Tensor & grad_out_, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & attn_bias, const Tensor & out, const Tensor & logsumexp, const Tensor & philox_seed, const Tensor & philox_offset, double dropout_p, ::std::array grad_input_mask, bool is_causal, c10::optional scale); // {"schema": "aten::_scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor attn_bias, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, float dropout_p, bool[4] grad_input_mask, bool is_causal=False, *, float? scale=None) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _scaled_dot_product_cudnn_attention(const Tensor & query, const Tensor & key, const Tensor & value, double dropout_p, bool is_causal, bool return_debug_mask, c10::optional scale); // {"schema": "aten::_scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)", "dispatch": "True", "default": "False"} +::std::tuple _flash_attention_forward(const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & cum_seq_q, const c10::optional & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, bool return_debug_mask, c10::optional scale); // {"schema": "aten::_flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)", "dispatch": "True", "default": "False"} +::std::tuple _flash_attention_backward(const Tensor & grad_out, const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & out, const Tensor & logsumexp, const Tensor & cum_seq_q, const Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const Tensor & philox_seed, const Tensor & philox_offset, c10::optional scale); // {"schema": "aten::_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +::std::tuple _efficient_attention_forward(const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & bias, const c10::optional & cu_seqlens_q, const c10::optional & cu_seqlens_k, c10::optional max_seqlen_q, c10::optional max_seqlen_k, double dropout_p, int64_t custom_mask_type, bool compute_log_sumexp, c10::optional scale, const c10::optional & causal_diagonal, const c10::optional & seqlen_k); // {"schema": "aten::_efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)", "dispatch": "True", "default": "False"} +::std::tuple _efficient_attention_backward(const Tensor & grad_out_, const Tensor & query, const Tensor & key, const Tensor & value, const c10::optional & bias, const Tensor & out, const c10::optional & cu_seqlens_q, const c10::optional & cu_seqlens_k, c10::SymInt max_seqlen_q, c10::SymInt max_seqlen_k, const Tensor & logsumexp, double dropout_p, const Tensor & philox_seed, const Tensor & philox_offset, int64_t custom_mask_type, bool bias_requires_grad, c10::optional scale, c10::optional num_splits_key); // {"schema": "aten::_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)", "dispatch": "True", "default": "False"} +Tensor _triton_scaled_dot_attention(const Tensor & q, const Tensor & k, const Tensor & v, double dropout_p); // {"schema": "aten::_triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor", "dispatch": "True", "default": "False"} +Tensor & _fill_mem_eff_dropout_mask_(Tensor & self, double dropout_p, int64_t seed, int64_t offset); // {"schema": "aten::_fill_mem_eff_dropout_mask_(Tensor(a!) self, float dropout_p, int seed, int offset) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _triton_multi_head_attention(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const c10::optional & mask); // {"schema": "aten::_triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor", "dispatch": "True", "default": "False"} +Tensor special_airy_ai(const Tensor & x); // {"schema": "aten::special_airy_ai(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_airy_ai_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_airy_ai.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_j0(const Tensor & self); // {"schema": "aten::special_bessel_j0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_j0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_j0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_j1(const Tensor & self); // {"schema": "aten::special_bessel_j1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_j1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_j1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_y0(const Tensor & self); // {"schema": "aten::special_bessel_y0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_y0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_y0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_bessel_y1(const Tensor & self); // {"schema": "aten::special_bessel_y1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_bessel_y1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_bessel_y1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_chebyshev_polynomial_t(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_t(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_t(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_t(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_t_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_t_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_t_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_t.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_u(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_u(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_u(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_u(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_u_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_u_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_u_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_u.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_v(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_v(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_v(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_v_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_v_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_v_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_w(const Tensor & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_w(const Scalar & x, const Tensor & n); // {"schema": "aten::special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_chebyshev_polynomial_w(const Tensor & x, const Scalar & n); // {"schema": "aten::special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_w_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_chebyshev_polynomial_w_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_chebyshev_polynomial_w_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_chebyshev_polynomial_w.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_h(const Tensor & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_h(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_h(const Scalar & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_h(const Tensor & x, const Scalar & n); // {"schema": "aten::special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_h_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_h.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_hermite_polynomial_h_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_h_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_h.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_he(const Tensor & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_he(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_he(const Scalar & x, const Tensor & n); // {"schema": "aten::special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_hermite_polynomial_he(const Tensor & x, const Scalar & n); // {"schema": "aten::special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_he_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_he.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_hermite_polynomial_he_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_hermite_polynomial_he_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_hermite_polynomial_he.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_laguerre_polynomial_l(const Tensor & x, const Tensor & n); // {"schema": "aten::special_laguerre_polynomial_l(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_laguerre_polynomial_l(const Scalar & x, const Tensor & n); // {"schema": "aten::special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_laguerre_polynomial_l(const Tensor & x, const Scalar & n); // {"schema": "aten::special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_laguerre_polynomial_l_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_laguerre_polynomial_l.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_laguerre_polynomial_l_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_laguerre_polynomial_l_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_laguerre_polynomial_l.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_legendre_polynomial_p(const Tensor & x, const Tensor & n); // {"schema": "aten::special_legendre_polynomial_p(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_legendre_polynomial_p(const Scalar & x, const Tensor & n); // {"schema": "aten::special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_legendre_polynomial_p(const Tensor & x, const Scalar & n); // {"schema": "aten::special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_legendre_polynomial_p_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_legendre_polynomial_p.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_legendre_polynomial_p_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_legendre_polynomial_p_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_legendre_polynomial_p.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_modified_bessel_i0(const Tensor & self); // {"schema": "aten::special_modified_bessel_i0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_i0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_modified_bessel_i1(const Tensor & self); // {"schema": "aten::special_modified_bessel_i1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_i1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_modified_bessel_k0(const Tensor & self); // {"schema": "aten::special_modified_bessel_k0(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_k0_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_modified_bessel_k1(const Tensor & self); // {"schema": "aten::special_modified_bessel_k1(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_modified_bessel_k1_out(const Tensor & self, Tensor & out); // {"schema": "aten::special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_scaled_modified_bessel_k0(const Tensor & x); // {"schema": "aten::special_scaled_modified_bessel_k0(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_scaled_modified_bessel_k0_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_scaled_modified_bessel_k0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_scaled_modified_bessel_k1(const Tensor & x); // {"schema": "aten::special_scaled_modified_bessel_k1(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_scaled_modified_bessel_k1_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_scaled_modified_bessel_k1.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor special_shifted_chebyshev_polynomial_t(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_t(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_t(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_t(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_t_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_t_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_t_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_t.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_u(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_u(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_u(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_u(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_u_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_u_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_u_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_u.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_v(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_v(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_v(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_v_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_v_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_v_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_w(const Tensor & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_w(const Scalar & x, const Tensor & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor special_shifted_chebyshev_polynomial_w(const Tensor & x, const Scalar & n); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_w_out(const Tensor & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor & special_shifted_chebyshev_polynomial_w_out(const Scalar & x, const Tensor & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & special_shifted_chebyshev_polynomial_w_out(const Tensor & x, const Scalar & n, Tensor & out); // {"schema": "aten::special_shifted_chebyshev_polynomial_w.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor special_spherical_bessel_j0(const Tensor & x); // {"schema": "aten::special_spherical_bessel_j0(Tensor x) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & special_spherical_bessel_j0_out(const Tensor & x, Tensor & out); // {"schema": "aten::special_spherical_bessel_j0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"} +Tensor _foobar(const Tensor & self, bool arg1, bool arg2, bool arg3); // {"schema": "aten::_foobar(Tensor self, bool arg1=True, bool arg2=True, *, bool arg3=True) -> Tensor", "dispatch": "True", "default": "False"} +void _fused_adam_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adam_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adamw_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_adamw_(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_sgd_(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _fused_sgd_(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, const Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()", "dispatch": "True", "default": "False"} +void _propagate_xla_data(const Tensor & input, const Tensor & output); // {"schema": "aten::_propagate_xla_data(Tensor input, Tensor output) -> ()", "dispatch": "False", "default": "True"} +Tensor & _new_zeros_with_same_feature_meta_out(const Tensor & self, const Tensor & other, int64_t self_num_batch_dims, Tensor & out); // {"schema": "aten::_new_zeros_with_same_feature_meta.out(Tensor self, Tensor other, *, int self_num_batch_dims=0, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _cudnn_ctc_loss_out(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool deterministic, bool zero_infinity, Tensor & out0, Tensor & out1); // {"schema": "aten::_cudnn_ctc_loss.out(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _cudnn_rnn_flatten_weight_out(TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional, Tensor & out); // {"schema": "aten::_cudnn_rnn_flatten_weight.out(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _cudnn_rnn_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const c10::optional & weight_buf, const Tensor & hx, const c10::optional & cx, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const c10::optional & dropout_state, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4); // {"schema": "aten::_cudnn_rnn.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor? weight_buf, Tensor hx, Tensor? cx, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))", "dispatch": "True", "default": "True"} +void _cudnn_rnn_backward_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const c10::optional & cx, const Tensor & output, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const c10::optional & dropout_state, const Tensor & reserve, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2, TensorList out3); // {"schema": "aten::_cudnn_rnn_backward.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!)[] out3) -> ()", "dispatch": "True", "default": "True"} +Tensor & _cudnn_init_dropout_state_out(double dropout, bool train, int64_t dropout_seed, Tensor & out); // {"schema": "aten::_cudnn_init_dropout_state.out(float dropout, bool train, int dropout_seed, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _fused_dropout_out(const Tensor & self, double p, c10::optional generator, Tensor & out0, Tensor & out1); // {"schema": "aten::_fused_dropout.out(Tensor self, float p, Generator? generator=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _masked_scale_out(const Tensor & self, const Tensor & mask, double scale, Tensor & out); // {"schema": "aten::_masked_scale.out(Tensor self, Tensor mask, float scale, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple native_dropout_out(const Tensor & input, double p, c10::optional train, Tensor & out0, Tensor & out1); // {"schema": "aten::native_dropout.out(Tensor input, float p, bool? train, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & native_dropout_backward_out(const Tensor & grad_output, const Tensor & mask, double scale, Tensor & out); // {"schema": "aten::native_dropout_backward.out(Tensor grad_output, Tensor mask, float scale, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _conj_physical_out(const Tensor & self, Tensor & out); // {"schema": "aten::_conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _add_relu_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::_add_relu.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & add_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::add.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & affine_grid_generator_out(const Tensor & theta, c10::SymIntArrayRef size, bool align_corners, Tensor & out); // {"schema": "aten::affine_grid_generator.out(Tensor theta, SymInt[] size, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_functorch_fallback_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::_test_functorch_fallback.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bartlett_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::bartlett_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bartlett_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::bartlett_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_batch_norm_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & mean, const Tensor & var, double eps, double output_scale, int64_t output_zero_point, Tensor & out); // {"schema": "aten::quantized_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bernoulli_out(const Tensor & self, const Tensor & p, c10::optional generator, Tensor & out); // {"schema": "aten::bernoulli.Tensor_out(Tensor self, Tensor p, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor bernoulli(const Tensor & self, const Tensor & p, c10::optional generator); // {"schema": "aten::bernoulli.Tensor(Tensor self, Tensor p, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & bernoulli_out(const Tensor & self, double p, c10::optional generator, Tensor & out); // {"schema": "aten::bernoulli.float_out(Tensor self, float p=0.5, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & binary_cross_entropy_with_logits_out(const Tensor & self, const Tensor & target, const c10::optional & weight, const c10::optional & pos_weight, int64_t reduction, Tensor & out); // {"schema": "aten::binary_cross_entropy_with_logits.out(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bincount_out(const Tensor & self, const c10::optional & weights, int64_t minlength, Tensor & out); // {"schema": "aten::bincount.out(Tensor self, Tensor? weights=None, int minlength=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & blackman_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::blackman_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & blackman_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::blackman_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & block_diag_out(TensorList tensors, Tensor & out); // {"schema": "aten::block_diag.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & constant_pad_nd_out(const Tensor & self, c10::SymIntArrayRef pad, const Scalar & value, Tensor & out); // {"schema": "aten::constant_pad_nd.out(Tensor self, SymInt[] pad, Scalar value=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & convolution_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, Tensor & out); // {"schema": "aten::convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward_out(const Tensor & grad_output, const Tensor & input, const Tensor & weight, OptionalSymIntArrayRef bias_sizes, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::convolution_backward.out(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & convolution_overrideable_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, Tensor & out); // {"schema": "aten::convolution_overrideable.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple convolution_backward_overrideable_out(const Tensor & grad_output, const Tensor & input, const Tensor & weight, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::convolution_backward_overrideable.out(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & _convolution_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, Tensor & out); // {"schema": "aten::_convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & conv_tbc_out(const Tensor & self, const Tensor & weight, const Tensor & bias, int64_t pad, Tensor & out); // {"schema": "aten::conv_tbc.out(Tensor self, Tensor weight, Tensor bias, int pad=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & copy_out(const Tensor & self, const Tensor & src, bool non_blocking, Tensor & out); // {"schema": "aten::copy.out(Tensor self, Tensor src, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _copy_from_out(const Tensor & self, const Tensor & dst, bool non_blocking, Tensor & out); // {"schema": "aten::_copy_from.out(Tensor self, Tensor dst, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _copy_from_and_resize_out(const Tensor & self, const Tensor & dst, Tensor & out); // {"schema": "aten::_copy_from_and_resize.out(Tensor self, Tensor dst, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & count_nonzero_out(const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::count_nonzero.dim_IntList_out(Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & count_nonzero_out(const Tensor & self, c10::optional dim, Tensor & out); // {"schema": "aten::count_nonzero.out(Tensor self, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_affine_grid_generator_out(const Tensor & theta, int64_t N, int64_t C, int64_t H, int64_t W, Tensor & out); // {"schema": "aten::cudnn_affine_grid_generator.out(Tensor theta, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_affine_grid_generator_backward_out(const Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W, Tensor & out); // {"schema": "aten::cudnn_affine_grid_generator_backward.out(Tensor grad, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple cudnn_batch_norm_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double exponential_average_factor, double epsilon, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::cudnn_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +::std::tuple cudnn_batch_norm_backward_out(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_var, double epsilon, const Tensor & reserveSpace, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::cudnn_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & cudnn_convolution_transpose_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, Tensor & out); // {"schema": "aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mps_convolution_transpose_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::_mps_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mps_convolution_transpose_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1); // {"schema": "aten::mps_convolution_transpose_backward.out(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & cudnn_convolution_relu_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::cudnn_convolution_relu.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_convolution_add_relu_out(const Tensor & self, const Tensor & weight, const Tensor & z, const c10::optional & alpha, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::cudnn_convolution_add_relu.out(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & cudnn_grid_sampler_out(const Tensor & self, const Tensor & grid, Tensor & out); // {"schema": "aten::cudnn_grid_sampler.out(Tensor self, Tensor grid, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple cudnn_grid_sampler_backward_out(const Tensor & self, const Tensor & grid, const Tensor & grad_output, Tensor & out0, Tensor & out1); // {"schema": "aten::cudnn_grid_sampler_backward.out(Tensor self, Tensor grid, Tensor grad_output, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _ctc_loss_out(const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool zero_infinity, Tensor & out0, Tensor & out1); // {"schema": "aten::_ctc_loss.out(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _ctc_loss_out(const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, bool zero_infinity, Tensor & out0, Tensor & out1); // {"schema": "aten::_ctc_loss.Tensor_out(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, bool zero_infinity=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _ctc_loss_backward_out(const Tensor & grad, const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity, Tensor & out); // {"schema": "aten::_ctc_loss_backward.out(Tensor grad, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diag_embed_out(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diag_embed.out(Tensor self, int offset=0, int dim1=-2, int dim2=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diagonal_backward_out(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diagonal_backward.out(Tensor grad_output, SymInt[] input_sizes, int offset, int dim1, int dim2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::div.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & div_out(const Tensor & self, const Scalar & other, c10::optional rounding_mode, Tensor & out); // {"schema": "aten::div.Scalar_mode_out(Tensor self, Scalar other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & embedding_out(const Tensor & weight, const Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse, Tensor & out); // {"schema": "aten::embedding.out(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & embedding_dense_backward_out(const Tensor & grad_output, const Tensor & indices, c10::SymInt num_weights, c10::SymInt padding_idx, bool scale_grad_by_freq, Tensor & out); // {"schema": "aten::embedding_dense_backward.out(Tensor grad_output, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & embedding_renorm_out(const Tensor & self, const Tensor & indices, double max_norm, double norm_type, Tensor & out); // {"schema": "aten::embedding_renorm.out(Tensor self, Tensor indices, float max_norm, float norm_type, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor embedding_renorm(const Tensor & self, const Tensor & indices, double max_norm, double norm_type); // {"schema": "aten::embedding_renorm(Tensor self, Tensor indices, float max_norm, float norm_type) -> Tensor", "dispatch": "True", "default": "True"} +::std::tuple _embedding_bag_forward_only_out(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::_embedding_bag_forward_only.out(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +::std::tuple _embedding_bag_out(const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional & per_sample_weights, bool include_last_offset, int64_t padding_idx, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::_embedding_bag.out(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +Tensor & _embedding_bag_dense_backward_out(const Tensor & grad, const Tensor & indices, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const c10::optional & per_sample_weights, int64_t padding_idx, Tensor & out); // {"schema": "aten::_embedding_bag_dense_backward.out(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _embedding_bag_per_sample_weights_backward_out(const Tensor & grad, const Tensor & weight, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, int64_t mode, int64_t padding_idx, Tensor & out); // {"schema": "aten::_embedding_bag_per_sample_weights_backward.out(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_out(IntArrayRef size, c10::optional names, c10::optional memory_format, Tensor & out); // {"schema": "aten::empty.names_out(int[] size, *, Dimname[]? names, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_permuted_out(c10::SymIntArrayRef size, IntArrayRef physical_layout, Tensor & out); // {"schema": "aten::empty_permuted.out(SymInt[] size, int[] physical_layout, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_empty_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::new_empty.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_empty_strided_out(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::new_empty_strided.out(Tensor self, SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_full_out(const Tensor & self, c10::SymIntArrayRef size, const Scalar & fill_value, Tensor & out); // {"schema": "aten::new_full.out(Tensor self, SymInt[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_zeros_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::new_zeros.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & new_ones_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::new_ones.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _empty_affine_quantized_out(c10::SymIntArrayRef size, double scale, int64_t zero_point, c10::optional memory_format, Tensor & out); // {"schema": "aten::_empty_affine_quantized.out(SymInt[] size, *, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _empty_per_channel_affine_quantized_out(c10::SymIntArrayRef size, const Tensor & scales, const Tensor & zero_points, int64_t axis, c10::optional memory_format, Tensor & out); // {"schema": "aten::_empty_per_channel_affine_quantized.out(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, MemoryFormat? memory_format=contiguous_format, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & resize_out(const Tensor & self, c10::SymIntArrayRef size, c10::optional memory_format, const Tensor & out); // {"schema": "aten::resize.out(Tensor self, SymInt[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resize(const Tensor & self, c10::SymIntArrayRef size, c10::optional memory_format); // {"schema": "aten::resize(Tensor self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +const Tensor & _resize_output_out(const Tensor & self, c10::SymIntArrayRef size, Device device, const Tensor & out); // {"schema": "aten::_resize_output.out(Tensor self, SymInt[] size, Device device, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _resize_output(const Tensor & self, c10::SymIntArrayRef size, Device device); // {"schema": "aten::_resize_output(Tensor self, SymInt[] size, Device device) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & empty_quantized_out(IntArrayRef size, const Tensor & qtensor, c10::optional memory_format, Tensor & out); // {"schema": "aten::empty_quantized.out(int[] size, Tensor qtensor, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_like_out(const Tensor & self, c10::optional memory_format, Tensor & out); // {"schema": "aten::empty_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & empty_strided_out(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & fill_out(const Tensor & self, const Scalar & value, Tensor & out); // {"schema": "aten::fill.Scalar_out(Tensor self, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & fill_out(const Tensor & self, const Tensor & value, Tensor & out); // {"schema": "aten::fill.Tensor_out(Tensor self, Tensor value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & floor_divide_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::floor_divide.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & full_out(IntArrayRef size, const Scalar & fill_value, c10::optional names, Tensor & out); // {"schema": "aten::full.names_out(int[] size, Scalar fill_value, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & full_like_out(const Tensor & self, const Scalar & fill_value, c10::optional memory_format, Tensor & out); // {"schema": "aten::full_like.out(Tensor self, Scalar fill_value, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & from_file_out(c10::string_view filename, c10::optional shared, c10::optional size, Tensor & out); // {"schema": "aten::from_file.out(str filename, bool? shared=None, int? size=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & grid_sampler_2d_out(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, Tensor & out); // {"schema": "aten::grid_sampler_2d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple grid_sampler_2d_backward_out(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask, Tensor & out0, Tensor & out1); // {"schema": "aten::grid_sampler_2d_backward.out(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _grid_sampler_2d_cpu_fallback_out(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, Tensor & out); // {"schema": "aten::_grid_sampler_2d_cpu_fallback.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & grid_sampler_3d_out(const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, Tensor & out); // {"schema": "aten::grid_sampler_3d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple grid_sampler_3d_backward_out(const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask, Tensor & out0, Tensor & out1); // {"schema": "aten::grid_sampler_3d_backward.out(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & hann_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::hann_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hann_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::hann_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::hamming_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::hamming_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, bool periodic, double alpha, Tensor & out); // {"schema": "aten::hamming_window.periodic_alpha_out(int window_length, bool periodic, float alpha, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hamming_window_out(int64_t window_length, bool periodic, double alpha, double beta, Tensor & out); // {"schema": "aten::hamming_window.periodic_alpha_beta_out(int window_length, bool periodic, float alpha, float beta, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & kaiser_window_out(int64_t window_length, Tensor & out); // {"schema": "aten::kaiser_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & kaiser_window_out(int64_t window_length, bool periodic, Tensor & out); // {"schema": "aten::kaiser_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & kaiser_window_out(int64_t window_length, bool periodic, double beta, Tensor & out); // {"schema": "aten::kaiser_window.beta_out(int window_length, bool periodic, float beta, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple native_group_norm_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple native_group_norm_backward_out(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & rstd, const c10::optional & weight, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_group_norm_backward.out(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, SymInt N, SymInt C, SymInt HxW, int group, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & index_put_out(const Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate, Tensor & out); // {"schema": "aten::index_put.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _index_put_impl_out(const Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate, bool unsafe, Tensor & out); // {"schema": "aten::_index_put_impl.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _index_put_impl(const Tensor & self, const c10::List> & indices, const Tensor & values, bool accumulate, bool unsafe); // {"schema": "aten::_index_put_impl(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & isnan_out(const Tensor & self, Tensor & out); // {"schema": "aten::isnan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple native_layer_norm_out(const Tensor & input, c10::SymIntArrayRef normalized_shape, const c10::optional & weight, const c10::optional & bias, double eps, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_layer_norm.out(Tensor input, SymInt[] normalized_shape, Tensor? weight, Tensor? bias, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple native_layer_norm_backward_out(const Tensor & grad_out, const Tensor & input, c10::SymIntArrayRef normalized_shape, const Tensor & mean, const Tensor & rstd, const c10::optional & weight, const c10::optional & bias, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_layer_norm_backward.out(Tensor grad_out, Tensor input, SymInt[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple linear_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::linear_backward.out(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & mkldnn_linear_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, Tensor & out); // {"schema": "aten::mkldnn_linear.out(Tensor self, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_linear_backward_input_out(IntArrayRef input_size, const Tensor & grad_output, const Tensor & weight, Tensor & out); // {"schema": "aten::mkldnn_linear_backward_input.out(int[] input_size, Tensor grad_output, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_linear_backward_weights_out(const Tensor & grad_output, const Tensor & input, const Tensor & weight, bool bias_defined, Tensor & out0, Tensor & out1); // {"schema": "aten::mkldnn_linear_backward_weights.out(Tensor grad_output, Tensor input, Tensor weight, bool bias_defined, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_linear_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::mkldnn_linear_backward.out(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple matmul_backward_out(const Tensor & grad, const Tensor & self, const Tensor & other, ::std::array mask, Tensor & out0, Tensor & out1); // {"schema": "aten::matmul_backward.out(Tensor grad, Tensor self, Tensor other, bool[2] mask, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _aminmax_out(const Tensor & self, Tensor & out0, Tensor & out1); // {"schema": "aten::_aminmax.out(Tensor self, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _aminmax_out(const Tensor & self, int64_t dim, bool keepdim, Tensor & out0, Tensor & out1); // {"schema": "aten::_aminmax.dim_out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & max_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::max_pool2d_backward.out(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool2d_backward_out(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool2d_backward.out(Tensor grad_output, Tensor output, Tensor input, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_max_pool3d_backward_out(const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::mkldnn_max_pool3d_backward.out(Tensor grad_output, Tensor output, Tensor input, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_max_pool1d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::quantized_max_pool1d.out(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_max_pool2d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::quantized_max_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantized_max_pool3d_out(const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, Tensor & out); // {"schema": "aten::quantized_max_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & median_out(const Tensor & self, Tensor & out); // {"schema": "aten::median.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & nanmedian_out(const Tensor & self, Tensor & out); // {"schema": "aten::nanmedian.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mps_convolution_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::_mps_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mps_convolution_backward_out(const Tensor & self, const Tensor & grad_output, const Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::mps_convolution_backward.out(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & mkldnn_convolution_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::mkldnn_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_rnn_layer_out(const Tensor & input, const Tensor & weight0, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & hx_, const Tensor & cx_, bool reverse, IntArrayRef batch_sizes, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::mkldnn_rnn_layer.out(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +::std::tuple mkldnn_rnn_layer_backward_out(const Tensor & input, const Tensor & weight1, const Tensor & weight2, const Tensor & weight3, const Tensor & weight4, const Tensor & hx_, const Tensor & cx_tmp, const Tensor & output, const Tensor & hy_, const Tensor & cy_, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, IntArrayRef batch_sizes, bool batch_first, const Tensor & workspace, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4, Tensor & out5, Tensor & out6); // {"schema": "aten::mkldnn_rnn_layer_backward.out(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4, Tensor(f!) out5, Tensor(g!) out6) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))", "dispatch": "True", "default": "True"} +::std::tuple miopen_batch_norm_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double exponential_average_factor, double epsilon, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::miopen_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple miopen_batch_norm_backward_out(const Tensor & input, const Tensor & grad_output, const Tensor & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_var, double epsilon, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::miopen_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & miopen_convolution_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, Tensor & out); // {"schema": "aten::miopen_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & miopen_convolution_transpose_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, Tensor & out); // {"schema": "aten::miopen_convolution_transpose.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & miopen_depthwise_convolution_out(const Tensor & self, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, Tensor & out); // {"schema": "aten::miopen_depthwise_convolution.out(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple miopen_rnn_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & hx, const c10::optional & cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const c10::optional & dropout_state, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4); // {"schema": "aten::miopen_rnn.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))", "dispatch": "True", "default": "True"} +void miopen_rnn_backward_out(const Tensor & input, TensorList weight, int64_t weight_stride0, const Tensor & weight_buf, const Tensor & hx, const c10::optional & cx, const Tensor & output, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, const c10::optional & dropout_state, const Tensor & reserve, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2, TensorList out3); // {"schema": "aten::miopen_rnn_backward.out(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!)[] out3) -> ()", "dispatch": "True", "default": "True"} +Tensor & _sparse_sparse_matmul_out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::_sparse_sparse_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mul_out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::mul.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _native_batch_norm_legit_functional(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & running_mean, const Tensor & running_var, bool training, double momentum, double eps); // {"schema": "aten::_native_batch_norm_legit_functional(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor running_mean_out, Tensor running_var_out)", "dispatch": "True", "default": "True"} +::std::tuple _native_batch_norm_legit_no_training_out(const Tensor & input, const c10::optional & weight, const c10::optional & bias, const Tensor & running_mean, const Tensor & running_var, double momentum, double eps, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_native_batch_norm_legit_no_training.out(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_stats_out(const Tensor & input, double eps, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_stats.out(Tensor input, float eps, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_gather_stats_out(const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, int64_t count, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_gather_stats.out(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_gather_stats_with_counts_out(const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, const Tensor & counts, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_gather_stats_with_counts.out(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple native_batch_norm_backward_out(const Tensor & grad_out, const Tensor & input, const c10::optional & weight, const c10::optional & running_mean, const c10::optional & running_var, const c10::optional & save_mean, const c10::optional & save_invstd, bool train, double eps, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::native_batch_norm_backward.out(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_backward_reduce_out(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & weight, bool input_g, bool weight_g, bool bias_g, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3); // {"schema": "aten::batch_norm_backward_reduce.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))", "dispatch": "True", "default": "True"} +Tensor & batch_norm_backward_elemt_out(const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional & weight, const Tensor & sum_dy, const Tensor & sum_dy_xmu, const Tensor & count, Tensor & out); // {"schema": "aten::batch_norm_backward_elemt.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple batch_norm_update_stats_out(const Tensor & input, const c10::optional & running_mean, const c10::optional & running_var, double momentum, Tensor & out0, Tensor & out1); // {"schema": "aten::batch_norm_update_stats.out(Tensor input, Tensor? running_mean, Tensor? running_var, float momentum, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _nnpack_spatial_convolution_out(const Tensor & input, const Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::_nnpack_spatial_convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ones_out(IntArrayRef size, c10::optional names, Tensor & out); // {"schema": "aten::ones.names_out(int[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ones_like_out(const Tensor & self, c10::optional memory_format, Tensor & out); // {"schema": "aten::ones_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _euclidean_dist_out(const Tensor & x1, const Tensor & x2, Tensor & out); // {"schema": "aten::_euclidean_dist.out(Tensor x1, Tensor x2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _cdist_forward_out(const Tensor & x1, const Tensor & x2, double p, c10::optional compute_mode, Tensor & out); // {"schema": "aten::_cdist_forward.out(Tensor x1, Tensor x2, float p, int? compute_mode, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _cdist_backward_out(const Tensor & grad, const Tensor & x1, const Tensor & x2, double p, const Tensor & cdist, Tensor & out); // {"schema": "aten::_cdist_backward.out(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _pdist_forward_out(const Tensor & self, double p, Tensor & out); // {"schema": "aten::_pdist_forward.out(Tensor self, float p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _pdist_backward_out(const Tensor & grad, const Tensor & self, double p, const Tensor & pdist, Tensor & out); // {"schema": "aten::_pdist_backward.out(Tensor grad, Tensor self, float p, Tensor pdist, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & pixel_shuffle_out(const Tensor & self, int64_t upscale_factor, Tensor & out); // {"schema": "aten::pixel_shuffle.out(Tensor self, int upscale_factor, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & pixel_unshuffle_out(const Tensor & self, int64_t downscale_factor, Tensor & out); // {"schema": "aten::pixel_unshuffle.out(Tensor self, int downscale_factor, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & channel_shuffle_out(const Tensor & self, c10::SymInt groups, Tensor & out); // {"schema": "aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _pin_memory_out(const Tensor & self, c10::optional device, Tensor & out); // {"schema": "aten::_pin_memory.out(Tensor self, Device? device=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & scalar_tensor_out(const Scalar & s, Tensor & out); // {"schema": "aten::scalar_tensor.out(Scalar s, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, c10::optional names, Tensor & out); // {"schema": "aten::rand.names_out(SymInt[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_out(c10::SymIntArrayRef size, c10::optional generator, c10::optional names, Tensor & out); // {"schema": "aten::rand.generator_with_names_out(SymInt[] size, *, Generator? generator, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rand_like_out(const Tensor & self, c10::optional memory_format, Tensor & out); // {"schema": "aten::rand_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_like_out(const Tensor & self, c10::SymInt high, c10::optional memory_format, Tensor & out); // {"schema": "aten::randint_like.out(Tensor self, SymInt high, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randint_like_out(const Tensor & self, c10::SymInt low, c10::SymInt high, c10::optional memory_format, Tensor & out); // {"schema": "aten::randint_like.low_dtype_out(Tensor self, SymInt low, SymInt high, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, c10::optional names, Tensor & out); // {"schema": "aten::randn.names_out(SymInt[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randn_out(c10::SymIntArrayRef size, c10::optional generator, c10::optional names, Tensor & out); // {"schema": "aten::randn.generator_with_names_out(SymInt[] size, *, Generator? generator, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & randn_like_out(const Tensor & self, c10::optional memory_format, Tensor & out); // {"schema": "aten::randn_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & repeat_out(const Tensor & self, c10::SymIntArrayRef repeats, Tensor & out); // {"schema": "aten::repeat.out(Tensor self, SymInt[] repeats, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & repeat_interleave_out(const Tensor & repeats, c10::optional output_size, Tensor & out); // {"schema": "aten::repeat_interleave.Tensor_out(Tensor repeats, *, SymInt? output_size=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mkldnn_reshape_out(const Tensor & self, IntArrayRef shape, Tensor & out); // {"schema": "aten::_mkldnn_reshape.out(Tensor self, int[] shape, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & relu_out(const Tensor & self, Tensor & out); // {"schema": "aten::relu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & select_backward_out(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt index, Tensor & out); // {"schema": "aten::select_backward.out(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & celu_out(const Tensor & self, const Scalar & alpha, Tensor & out); // {"schema": "aten::celu.out(Tensor self, Scalar alpha=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slice_backward_out(const Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt start, c10::SymInt end, c10::SymInt step, Tensor & out); // {"schema": "aten::slice_backward.out(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt start, SymInt end, SymInt step, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slice_scatter_out(const Tensor & self, const Tensor & src, int64_t dim, c10::optional start, c10::optional end, c10::SymInt step, Tensor & out); // {"schema": "aten::slice_scatter.out(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & select_scatter_out(const Tensor & self, const Tensor & src, int64_t dim, c10::SymInt index, Tensor & out); // {"schema": "aten::select_scatter.out(Tensor self, Tensor src, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diagonal_scatter_out(const Tensor & self, const Tensor & src, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diagonal_scatter.out(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & as_strided_scatter_out(const Tensor & self, const Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional storage_offset, Tensor & out); // {"schema": "aten::as_strided_scatter.out(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void unsafe_split_out(const Tensor & self, c10::SymInt split_size, int64_t dim, TensorList out); // {"schema": "aten::unsafe_split.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void unsafe_split_with_sizes_out(const Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim, TensorList out); // {"schema": "aten::unsafe_split_with_sizes.out(Tensor self, SymInt[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & sum_out(const Tensor & self, c10::optional dtype, Tensor & out); // {"schema": "aten::sum.out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple std_mean_out(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim, Tensor & out0, Tensor & out1); // {"schema": "aten::std_mean.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & prod_out(const Tensor & self, c10::optional dtype, Tensor & out); // {"schema": "aten::prod.out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _mkldnn_transpose_out(const Tensor & self, int64_t dim0, int64_t dim1, Tensor & out); // {"schema": "aten::_mkldnn_transpose.out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & flip_out(const Tensor & self, IntArrayRef dims, Tensor & out); // {"schema": "aten::flip.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & roll_out(const Tensor & self, c10::SymIntArrayRef shifts, IntArrayRef dims, Tensor & out); // {"schema": "aten::roll.out(Tensor self, SymInt[1] shifts, int[1] dims=[], *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rot90_out(const Tensor & self, int64_t k, IntArrayRef dims, Tensor & out); // {"schema": "aten::rot90.out(Tensor self, int k=1, int[] dims=[0,1], *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _transform_bias_rescale_qkv_out(const Tensor & qkv, const Tensor & qkv_bias, int64_t num_heads, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_transform_bias_rescale_qkv.out(Tensor qkv, Tensor qkv_bias, int num_heads, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_from_mask_out(const Tensor & t, const Tensor & mask, bool mask_check, Tensor & out); // {"schema": "aten::_nested_tensor_from_mask.out(Tensor t, Tensor mask, bool mask_check=True, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_from_padded_out(const Tensor & padded, const Tensor & cpu_nested_shape_example, bool fuse_transform_0213, Tensor & out); // {"schema": "aten::_nested_from_padded.out(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_size_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_tensor_size.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_strides_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_tensor_strides.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_storage_offsets_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_tensor_storage_offsets.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_from_padded_and_nested_example_out(const Tensor & padded, const Tensor & nt_example, Tensor & out); // {"schema": "aten::_nested_from_padded_and_nested_example.out(Tensor padded, Tensor nt_example, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_view_from_buffer_copy_out(const Tensor & self, const Tensor & nested_size, const Tensor & nested_strides, const Tensor & offsets, Tensor & out); // {"schema": "aten::_nested_view_from_buffer_copy.out(Tensor self, Tensor nested_size, Tensor nested_strides, Tensor offsets, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_view_from_jagged_copy_out(const Tensor & self, const Tensor & offsets, const Tensor & dummy, const c10::optional & lengths, int64_t ragged_idx, Tensor & out); // {"schema": "aten::_nested_view_from_jagged_copy.out(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_get_values_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_nested_get_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _trilinear_out(const Tensor & i1, const Tensor & i2, const Tensor & i3, IntArrayRef expand1, IntArrayRef expand2, IntArrayRef expand3, IntArrayRef sumdim, int64_t unroll_dim, Tensor & out); // {"schema": "aten::_trilinear.out(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _unique_out(const Tensor & self, bool sorted, bool return_inverse, Tensor & out0, Tensor & out1); // {"schema": "aten::_unique.out(Tensor self, bool sorted=True, bool return_inverse=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple unique_dim_out(const Tensor & self, int64_t dim, bool sorted, bool return_inverse, bool return_counts, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::unique_dim.out(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple unique_consecutive_out(const Tensor & self, bool return_inverse, bool return_counts, c10::optional dim, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::unique_consecutive.out(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple unique_dim_consecutive_out(const Tensor & self, int64_t dim, bool return_inverse, bool return_counts, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::unique_dim_consecutive.out(Tensor self, int dim, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple _unique2_out(const Tensor & self, bool sorted, bool return_inverse, bool return_counts, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_unique2.out(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & _unsafe_view_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple var_mean_out(const Tensor & self, OptionalIntArrayRef dim, const c10::optional & correction, bool keepdim, Tensor & out0, Tensor & out1); // {"schema": "aten::var_mean.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _weight_norm_interface_out(const Tensor & v, const Tensor & g, int64_t dim, Tensor & out0, Tensor & out1); // {"schema": "aten::_weight_norm_interface.out(Tensor v, Tensor g, int dim=0, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _weight_norm_interface_backward_out(const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim, Tensor & out0, Tensor & out1); // {"schema": "aten::_weight_norm_interface_backward.out(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & zeros_out(IntArrayRef size, c10::optional names, Tensor & out); // {"schema": "aten::zeros.names_out(int[] size, *, Dimname[]? names, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _efficientzerotensor_out(c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::_efficientzerotensor.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & zeros_like_out(const Tensor & self, c10::optional memory_format, Tensor & out); // {"schema": "aten::zeros_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _standard_gamma_grad_out(const Tensor & self, const Tensor & output, Tensor & out); // {"schema": "aten::_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _standard_gamma_out(const Tensor & self, c10::optional generator, Tensor & out); // {"schema": "aten::_standard_gamma.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _dirichlet_grad_out(const Tensor & x, const Tensor & alpha, const Tensor & total, Tensor & out); // {"schema": "aten::_dirichlet_grad.out(Tensor x, Tensor alpha, Tensor total, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sample_dirichlet_out(const Tensor & self, c10::optional generator, Tensor & out); // {"schema": "aten::_sample_dirichlet.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & poisson_out(const Tensor & self, c10::optional generator, Tensor & out); // {"schema": "aten::poisson.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & binomial_out(const Tensor & count, const Tensor & prob, c10::optional generator, Tensor & out); // {"schema": "aten::binomial.out(Tensor count, Tensor prob, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & native_norm_out(const Tensor & self, const Scalar & p, Tensor & out); // {"schema": "aten::native_norm.out(Tensor self, Scalar p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & native_norm_out(const Tensor & self, const c10::optional & p, IntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::native_norm.ScalarOpt_dim_dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, ScalarType? dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_sum_out(const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::_sparse_sum.dim_out(Tensor self, int[1] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_sum_backward_out(const Tensor & grad, const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::_sparse_sum_backward.out(Tensor grad, Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_csr_sum_out(const Tensor & self, IntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_csr_prod_out(const Tensor & self, IntArrayRef dim, bool keepdim, c10::optional dtype, Tensor & out); // {"schema": "aten::_sparse_csr_prod.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_sparse_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self, Tensor & out); // {"schema": "aten::_sparse_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_log_softmax_out(const Tensor & self, int64_t dim, bool half_to_float, Tensor & out); // {"schema": "aten::_sparse_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_log_softmax_backward_data_out(const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self, Tensor & out); // {"schema": "aten::_sparse_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _spdiags_out(const Tensor & diagonals, const Tensor & offsets, IntArrayRef shape, c10::optional layout, Tensor & out); // {"schema": "aten::_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & norm_out(const Tensor & self, const c10::optional & p, ScalarType dtype, Tensor & out); // {"schema": "aten::norm.ScalarOpt_dtype_out(Tensor self, Scalar? p, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & norm_out(const Tensor & self, const Scalar & p, Tensor & out); // {"schema": "aten::norm.Scalar_out(Tensor self, Scalar p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & clone_out(const Tensor & self, c10::optional memory_format, Tensor & out); // {"schema": "aten::clone.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & resize_as_out(const Tensor & self, const Tensor & the_template, c10::optional memory_format, const Tensor & out); // {"schema": "aten::resize_as.out(Tensor self, Tensor the_template, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resize_as(const Tensor & self, const Tensor & the_template, c10::optional memory_format); // {"schema": "aten::resize_as(Tensor self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor", "dispatch": "True", "default": "True"} +const Tensor & resize_as_sparse_out(const Tensor & self, const Tensor & the_template, const Tensor & out); // {"schema": "aten::resize_as_sparse.out(Tensor self, Tensor the_template, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor resize_as_sparse(const Tensor & self, const Tensor & the_template); // {"schema": "aten::resize_as_sparse(Tensor self, Tensor the_template) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & zero_out(const Tensor & self, Tensor & out); // {"schema": "aten::zero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor zero(const Tensor & self); // {"schema": "aten::zero(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sub_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::sub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rsub_out(const Tensor & self, const Tensor & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::rsub.Tensor_out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rsub_out(const Tensor & self, const Scalar & other, const Scalar & alpha, Tensor & out); // {"schema": "aten::rsub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_addmm_out(const Tensor & self, const Tensor & mat1, const Tensor & mat2, const Scalar & beta, const Scalar & alpha, Tensor & out); // {"schema": "aten::_sparse_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & sparse_coo_tensor_out(IntArrayRef size, Tensor & out); // {"schema": "aten::sparse_coo_tensor.size_out(int[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_coo_tensor_with_dims_out(int64_t sparse_dim, int64_t dense_dim, IntArrayRef size, Tensor & out); // {"schema": "aten::_sparse_coo_tensor_with_dims.out(int sparse_dim, int dense_dim, int[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_coo_tensor_with_dims_and_tensors_out(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const Tensor & indices, const Tensor & values, c10::optional is_coalesced, Tensor & out); // {"schema": "aten::_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +const Tensor & sparse_resize_out(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim, const Tensor & out); // {"schema": "aten::sparse_resize.out(Tensor self, int[] size, int sparse_dim, int dense_dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor sparse_resize(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize(Tensor self, int[] size, int sparse_dim, int dense_dim) -> Tensor", "dispatch": "True", "default": "True"} +const Tensor & sparse_resize_and_clear_out(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim, const Tensor & out); // {"schema": "aten::sparse_resize_and_clear.out(Tensor self, int[] size, int sparse_dim, int dense_dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor sparse_resize_and_clear(const Tensor & self, IntArrayRef size, int64_t sparse_dim, int64_t dense_dim); // {"schema": "aten::sparse_resize_and_clear(Tensor self, int[] size, int sparse_dim, int dense_dim) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & sparse_mask_out(const Tensor & self, const Tensor & mask, Tensor & out); // {"schema": "aten::sparse_mask.out(Tensor self, Tensor mask, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_mask_projection_out(const Tensor & self, const Tensor & mask, bool accumulate_matches, Tensor & out); // {"schema": "aten::_sparse_mask_projection.out(Tensor self, Tensor mask, bool accumulate_matches=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_dense_out(const Tensor & self, c10::optional dtype, c10::optional masked_grad, Tensor & out); // {"schema": "aten::_to_dense.out(Tensor self, ScalarType? dtype=None, bool? masked_grad=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _coalesce_out(const Tensor & self, Tensor & out); // {"schema": "aten::_coalesce.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _coalesced_out(const Tensor & self, bool coalesced, Tensor & out); // {"schema": "aten::_coalesced.out(Tensor self, bool coalesced, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor _coalesced(const Tensor & self, bool coalesced); // {"schema": "aten::_coalesced(Tensor self, bool coalesced) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & copy_sparse_to_sparse_out(const Tensor & self, const Tensor & src, bool non_blocking, Tensor & out); // {"schema": "aten::copy_sparse_to_sparse.out(Tensor self, Tensor src, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor copy_sparse_to_sparse(const Tensor & self, const Tensor & src, bool non_blocking); // {"schema": "aten::copy_sparse_to_sparse(Tensor self, Tensor src, bool non_blocking=False) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_out(const Tensor & self, int64_t sparse_dim, Tensor & out); // {"schema": "aten::_to_sparse.sparse_dim_out(Tensor self, int sparse_dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_out(const Tensor & self, c10::optional layout, OptionalIntArrayRef blocksize, c10::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse.out(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_csr_out(const Tensor & self, c10::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_csr.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_csc_out(const Tensor & self, c10::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_bsr_out(const Tensor & self, IntArrayRef blocksize, c10::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_bsr.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _to_sparse_bsc_out(const Tensor & self, IntArrayRef blocksize, c10::optional dense_dim, Tensor & out); // {"schema": "aten::_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & to_mkldnn_out(const Tensor & self, c10::optional dtype, Tensor & out); // {"schema": "aten::to_mkldnn.out(Tensor self, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_reorder_conv2d_weight_out(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, OptionalSymIntArrayRef input_size, Tensor & out); // {"schema": "aten::mkldnn_reorder_conv2d_weight.out(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_reorder_conv3d_weight_out(const Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, Tensor & out); // {"schema": "aten::mkldnn_reorder_conv3d_weight.out(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantize_per_tensor_dynamic_out(const Tensor & self, ScalarType dtype, bool reduce_range, Tensor & out); // {"schema": "aten::quantize_per_tensor_dynamic.out(Tensor self, ScalarType dtype, bool reduce_range, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantize_per_tensor_out(const Tensor & self, double scale, int64_t zero_point, ScalarType dtype, Tensor & out); // {"schema": "aten::quantize_per_tensor.out(Tensor self, float scale, int zero_point, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & quantize_per_tensor_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, ScalarType dtype, Tensor & out); // {"schema": "aten::quantize_per_tensor.tensor_qparams_out(Tensor self, Tensor scale, Tensor zero_point, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void quantize_per_tensor_out(TensorList tensors, const Tensor & scales, const Tensor & zero_points, ScalarType dtype, TensorList out); // {"schema": "aten::quantize_per_tensor.tensors_out(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & quantize_per_channel_out(const Tensor & self, const Tensor & scales, const Tensor & zero_points, int64_t axis, ScalarType dtype, Tensor & out); // {"schema": "aten::quantize_per_channel.out(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & dequantize_out(const Tensor & self, Tensor & out); // {"schema": "aten::dequantize.self_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void dequantize_out(TensorList tensors, TensorList out); // {"schema": "aten::dequantize.tensors_out(Tensor[] tensors, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & q_per_channel_scales_out(const Tensor & self, Tensor & out); // {"schema": "aten::q_per_channel_scales.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & q_per_channel_zero_points_out(const Tensor & self, Tensor & out); // {"schema": "aten::q_per_channel_zero_points.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & int_repr_out(const Tensor & self, Tensor & out); // {"schema": "aten::int_repr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _make_per_tensor_quantized_tensor_out(const Tensor & self, double scale, int64_t zero_point, Tensor & out); // {"schema": "aten::_make_per_tensor_quantized_tensor.out(Tensor self, float scale, int zero_point, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _make_per_channel_quantized_tensor_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, Tensor & out); // {"schema": "aten::_make_per_channel_quantized_tensor.out(Tensor self, Tensor scale, Tensor zero_point, int axis, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple fake_quantize_per_tensor_affine_cachemask_out(const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max, Tensor & out0, Tensor & out1); // {"schema": "aten::fake_quantize_per_tensor_affine_cachemask.out(Tensor self, float scale, int zero_point, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _fake_quantize_per_tensor_affine_cachemask_tensor_qparams_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, const Tensor & fake_quant_enabled, int64_t quant_min, int64_t quant_max, Tensor & out0, Tensor & out1); // {"schema": "aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out(Tensor self, Tensor scale, Tensor zero_point, Tensor fake_quant_enabled, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _fake_quantize_learnable_per_tensor_affine_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor, Tensor & out); // {"schema": "aten::_fake_quantize_learnable_per_tensor_affine.out(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple fake_quantize_per_channel_affine_cachemask_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, Tensor & out0, Tensor & out1); // {"schema": "aten::fake_quantize_per_channel_affine_cachemask.out(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _fake_quantize_learnable_per_channel_affine_out(const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, Tensor & out); // {"schema": "aten::_fake_quantize_learnable_per_channel_affine.out(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _fused_moving_avg_obs_fq_helper_out(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, Tensor & running_min, Tensor & running_max, Tensor & scale, Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant, Tensor & out0, Tensor & out1); // {"schema": "aten::_fused_moving_avg_obs_fq_helper.out(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False, *, Tensor(e!) out0, Tensor(f!) out1) -> (Tensor(e!), Tensor(f!))", "dispatch": "True", "default": "True"} +::std::tuple _fused_moving_avg_obs_fq_helper_functional(const Tensor & self, const Tensor & observer_on, const Tensor & fake_quant_on, const Tensor & running_min, const Tensor & running_max, const Tensor & scale, const Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); // {"schema": "aten::_fused_moving_avg_obs_fq_helper_functional(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor running_min, Tensor running_max, Tensor scale, Tensor zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> (Tensor output, Tensor mask, Tensor running_min_out, Tensor running_max_out, Tensor scale_out, Tensor zero_point_out)", "dispatch": "True", "default": "True"} +Tensor & _to_copy_out(const Tensor & self, bool non_blocking, c10::optional memory_format, Tensor & out); // {"schema": "aten::_to_copy.out(Tensor self, *, bool non_blocking=False, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _lstm_mps_out(const Tensor & input, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4, Tensor & out5); // {"schema": "aten::_lstm_mps.out(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4, Tensor(f!) out5) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!), Tensor(f!))", "dispatch": "True", "default": "True"} +void lstm_mps_backward_out(const c10::optional & grad_y, const c10::optional & grad_hy, const c10::optional & grad_cy, const Tensor & z_state, const Tensor & cell_state_fwd, const Tensor & input, const Tensor & layersOutputs, TensorList hx, TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first, Tensor & out0, TensorList out1, TensorList out2); // {"schema": "aten::lstm_mps_backward.out(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, Tensor(a!) out0, Tensor(b!)[] out1, Tensor(c!)[] out2) -> ()", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_lstm_cell_out(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & cx, const c10::optional & input_bias, const c10::optional & hidden_bias, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_thnn_fused_lstm_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_lstm_cell_backward_impl_out(const c10::optional & grad_hy, const c10::optional & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_thnn_fused_lstm_cell_backward_impl.out(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_gru_cell_out(const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const c10::optional & input_bias, const c10::optional & hidden_bias, Tensor & out0, Tensor & out1); // {"schema": "aten::_thnn_fused_gru_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +::std::tuple _thnn_fused_gru_cell_backward_out(const Tensor & grad_hy, const Tensor & workspace, bool has_bias, Tensor & out0, Tensor & out1, Tensor & out2, Tensor & out3, Tensor & out4); // {"schema": "aten::_thnn_fused_gru_cell_backward.out(Tensor grad_hy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))", "dispatch": "True", "default": "True"} +::std::tuple _pack_padded_sequence_out(const Tensor & input, const Tensor & lengths, bool batch_first, Tensor & out0, Tensor & out1); // {"schema": "aten::_pack_padded_sequence.out(Tensor input, Tensor lengths, bool batch_first, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, Storage source, Tensor & out); // {"schema": "aten::set.source_Storage_out(Tensor self, Storage source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self, Storage source); // {"schema": "aten::set.source_Storage(Tensor self, Storage source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::set.source_Storage_storage_offset_out(Tensor self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[], *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self, Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); // {"schema": "aten::set.source_Storage_storage_offset(Tensor self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, const Tensor & source, Tensor & out); // {"schema": "aten::set.source_Tensor_out(Tensor self, Tensor source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self, const Tensor & source); // {"schema": "aten::set.source_Tensor(Tensor self, Tensor source) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & set_out(const Tensor & self, Tensor & out); // {"schema": "aten::set.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor set(const Tensor & self); // {"schema": "aten::set(Tensor self) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & lift_out(const Tensor & self, Tensor & out); // {"schema": "aten::lift.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & lift_fresh_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::lift_fresh_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & masked_fill_out(const Tensor & self, const Tensor & mask, const Scalar & value, Tensor & out); // {"schema": "aten::masked_fill.Scalar_out(Tensor self, Tensor mask, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & masked_fill_out(const Tensor & self, const Tensor & mask, const Tensor & value, Tensor & out); // {"schema": "aten::masked_fill.Tensor_out(Tensor self, Tensor mask, Tensor value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & masked_scatter_out(const Tensor & self, const Tensor & mask, const Tensor & source, Tensor & out); // {"schema": "aten::masked_scatter.out(Tensor self, Tensor mask, Tensor source, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _masked_softmax_out(const Tensor & self, const Tensor & mask, c10::optional dim, c10::optional mask_type, Tensor & out); // {"schema": "aten::_masked_softmax.out(Tensor self, Tensor mask, int? dim=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _masked_softmax_backward_out(const Tensor & grad_output, const Tensor & output, const Tensor & mask, c10::optional dim, Tensor & out); // {"schema": "aten::_masked_softmax_backward.out(Tensor grad_output, Tensor output, Tensor mask, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & put_out(const Tensor & self, const Tensor & index, const Tensor & source, bool accumulate, Tensor & out); // {"schema": "aten::put.out(Tensor self, Tensor index, Tensor source, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & index_fill_out(const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, Tensor & out); // {"schema": "aten::index_fill.int_Scalar_out(Tensor self, int dim, Tensor index, Scalar value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & index_fill_out(const Tensor & self, int64_t dim, const Tensor & index, const Tensor & value, Tensor & out); // {"schema": "aten::index_fill.int_Tensor_out(Tensor self, int dim, Tensor index, Tensor value, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_and_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_and.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_or_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_or.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_xor_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_xor.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __lshift___out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::__lshift__.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __lshift___out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::__lshift__.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_left_shift_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_left_shift.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __rshift___out(const Tensor & self, const Scalar & other, Tensor & out); // {"schema": "aten::__rshift__.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & __rshift___out(const Tensor & self, const Tensor & other, Tensor & out); // {"schema": "aten::__rshift__.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & bitwise_right_shift_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::bitwise_right_shift.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & random_out(const Tensor & self, int64_t from, c10::optional to, c10::optional generator, Tensor & out); // {"schema": "aten::random.from_out(Tensor self, int from, int? to, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor random(const Tensor & self, int64_t from, c10::optional to, c10::optional generator); // {"schema": "aten::random.from(Tensor self, int from, int? to, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & random_out(const Tensor & self, int64_t to, c10::optional generator, Tensor & out); // {"schema": "aten::random.to_out(Tensor self, int to, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor random(const Tensor & self, int64_t to, c10::optional generator); // {"schema": "aten::random.to(Tensor self, int to, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & random_out(const Tensor & self, c10::optional generator, Tensor & out); // {"schema": "aten::random.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor random(const Tensor & self, c10::optional generator); // {"schema": "aten::random(Tensor self, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & uniform_out(const Tensor & self, double from, double to, c10::optional generator, Tensor & out); // {"schema": "aten::uniform.out(Tensor self, float from=0, float to=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor uniform(const Tensor & self, double from, double to, c10::optional generator); // {"schema": "aten::uniform(Tensor self, float from=0, float to=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & cauchy_out(const Tensor & self, double median, double sigma, c10::optional generator, Tensor & out); // {"schema": "aten::cauchy.out(Tensor self, float median=0, float sigma=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor cauchy(const Tensor & self, double median, double sigma, c10::optional generator); // {"schema": "aten::cauchy(Tensor self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & log_normal_out(const Tensor & self, double mean, double std, c10::optional generator, Tensor & out); // {"schema": "aten::log_normal.out(Tensor self, float mean=1, float std=2, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor log_normal(const Tensor & self, double mean, double std, c10::optional generator); // {"schema": "aten::log_normal(Tensor self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & exponential_out(const Tensor & self, double lambd, c10::optional generator, Tensor & out); // {"schema": "aten::exponential.out(Tensor self, float lambd=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor exponential(const Tensor & self, double lambd, c10::optional generator); // {"schema": "aten::exponential(Tensor self, float lambd=1, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & geometric_out(const Tensor & self, double p, c10::optional generator, Tensor & out); // {"schema": "aten::geometric.out(Tensor self, float p, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor geometric(const Tensor & self, double p, c10::optional generator); // {"schema": "aten::geometric(Tensor self, float p, *, Generator? generator=None) -> Tensor", "dispatch": "True", "default": "True"} +Tensor & tril_indices_out(int64_t row, int64_t col, int64_t offset, Tensor & out); // {"schema": "aten::tril_indices.out(int row, int col, int offset=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & triu_indices_out(int64_t row, int64_t col, int64_t offset, Tensor & out); // {"schema": "aten::triu_indices.out(int row, int col, int offset=0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & trace_out(const Tensor & self, Tensor & out); // {"schema": "aten::trace.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _cholesky_solve_helper_out(const Tensor & self, const Tensor & A, bool upper, Tensor & out); // {"schema": "aten::_cholesky_solve_helper.out(Tensor self, Tensor A, bool upper, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & dist_out(const Tensor & self, const Tensor & other, const Scalar & p, Tensor & out); // {"schema": "aten::dist.out(Tensor self, Tensor other, Scalar p=2, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void _histogramdd_bin_edges_out(const Tensor & self, IntArrayRef bins, c10::optional> range, const c10::optional & weight, bool density, TensorList out); // {"schema": "aten::_histogramdd_bin_edges.out(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +Tensor & _histogramdd_from_bin_cts_out(const Tensor & self, IntArrayRef bins, c10::optional> range, const c10::optional & weight, bool density, Tensor & out); // {"schema": "aten::_histogramdd_from_bin_cts.out(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _histogramdd_from_bin_tensors_out(const Tensor & self, TensorList bins, const c10::optional & weight, bool density, Tensor & out); // {"schema": "aten::_histogramdd_from_bin_tensors.out(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & remainder_out(const Scalar & self, const Tensor & other, Tensor & out); // {"schema": "aten::remainder.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & argsort_out(const Tensor & self, bool stable, int64_t dim, bool descending, Tensor & out); // {"schema": "aten::argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & unfold_backward_out(const Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, Tensor & out); // {"schema": "aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & normal_out(const Tensor & self, double mean, double std, c10::optional generator, Tensor & out); // {"schema": "aten::normal.out(Tensor self, float mean=0, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void _amp_foreach_non_finite_check_and_unscale_out(TensorList self, Tensor & found_inf, const Tensor & inv_scale, TensorList out); // {"schema": "aten::_amp_foreach_non_finite_check_and_unscale.out(Tensor[] self, Tensor(b!) found_inf, Tensor inv_scale, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,Tensor> _amp_foreach_non_finite_check_and_unscale(TensorList self, const Tensor & found_inf, const Tensor & inv_scale); // {"schema": "aten::_amp_foreach_non_finite_check_and_unscale(Tensor[] self, Tensor found_inf, Tensor inv_scale) -> (Tensor[] self_out, Tensor found_inf_out)", "dispatch": "True", "default": "True"} +Tensor & _amp_update_scale_out(const Tensor & self, Tensor & growth_tracker, const Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, Tensor & out); // {"schema": "aten::_amp_update_scale.out(Tensor self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _amp_update_scale(const Tensor & self, const Tensor & growth_tracker, const Tensor & found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); // {"schema": "aten::_amp_update_scale(Tensor self, Tensor growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> (Tensor, Tensor growth_tracker_out)", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_add.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, TensorList other, const Scalar & alpha, TensorList out); // {"schema": "aten::_foreach_add.List_out(Tensor[] self, Tensor[] other, *, Scalar alpha=1, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_add.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_add_out(TensorList self, const Tensor & other, const Scalar & alpha, TensorList out); // {"schema": "aten::_foreach_add.Tensor_out(Tensor[] self, Tensor other, *, Scalar alpha=1, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sub_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_sub.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sub_out(TensorList self, TensorList other, const Scalar & alpha, TensorList out); // {"schema": "aten::_foreach_sub.List_out(Tensor[] self, Tensor[] other, *, Scalar alpha=1, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sub_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_sub.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_mul.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_mul.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_mul.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_mul_out(TensorList self, const Tensor & other, TensorList out); // {"schema": "aten::_foreach_mul.Tensor_out(Tensor[] self, Tensor other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_div.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_div.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_div.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_div_out(TensorList self, const Tensor & other, TensorList out); // {"schema": "aten::_foreach_div.Tensor_out(Tensor[] self, Tensor other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_clamp_max.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_clamp_max.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_max_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_clamp_max.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_clamp_min.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_clamp_min.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_clamp_min_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_clamp_min.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_maximum_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_maximum.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_maximum_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_maximum.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_maximum_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_maximum.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_minimum_out(TensorList self, const Scalar & scalar, TensorList out); // {"schema": "aten::_foreach_minimum.Scalar_out(Tensor[] self, Scalar scalar, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_minimum_out(TensorList self, TensorList other, TensorList out); // {"schema": "aten::_foreach_minimum.List_out(Tensor[] self, Tensor[] other, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_minimum_out(TensorList self, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_minimum.ScalarList_out(Tensor[] self, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_out(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value, TensorList out); // {"schema": "aten::_foreach_addcdiv.Scalar_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_out(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_addcdiv.ScalarList_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcdiv_out(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars, TensorList out); // {"schema": "aten::_foreach_addcdiv.Tensor_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_out(TensorList self, TensorList tensor1, TensorList tensor2, const Scalar & value, TensorList out); // {"schema": "aten::_foreach_addcmul.Scalar_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_out(TensorList self, TensorList tensor1, TensorList tensor2, ArrayRef scalars, TensorList out); // {"schema": "aten::_foreach_addcmul.ScalarList_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_addcmul_out(TensorList self, TensorList tensor1, TensorList tensor2, const Tensor & scalars, TensorList out); // {"schema": "aten::_foreach_addcmul.Tensor_out(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_abs_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_abs.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_acos_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_acos.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_asin_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_asin.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_atan_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_atan.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_ceil_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_ceil.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_cos_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_cos.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_cosh_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_cosh.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_erf_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_erf.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_erfc_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_erfc.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_exp_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_exp.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_expm1_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_expm1.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_floor_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_floor.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_frac_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_frac.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_lerp_out(TensorList self, TensorList tensors1, TensorList weights, TensorList out); // {"schema": "aten::_foreach_lerp.List_out(Tensor[] self, Tensor[] tensors1, Tensor[] weights, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_lerp_out(TensorList self, TensorList tensors1, const Scalar & weight, TensorList out); // {"schema": "aten::_foreach_lerp.Scalar_out(Tensor[] self, Tensor[] tensors1, Scalar weight, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_lgamma_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_lgamma.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log10_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log10.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log1p_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log1p.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_log2_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_log2.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_neg_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_neg.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_norm_out(TensorList self, const Scalar & ord, TensorList out); // {"schema": "aten::_foreach_norm.Scalar_out(Tensor[] self, Scalar ord=2, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_out(TensorList self, TensorList exponent, TensorList out); // {"schema": "aten::_foreach_pow.List_out(Tensor[] self, Tensor[] exponent, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_out(TensorList self, const Scalar & exponent, TensorList out); // {"schema": "aten::_foreach_pow.Scalar_out(Tensor[] self, Scalar exponent, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_pow_out(TensorList self, ArrayRef exponent, TensorList out); // {"schema": "aten::_foreach_pow.ScalarList_out(Tensor[] self, Scalar[] exponent, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_reciprocal_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_reciprocal.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_round_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_round.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sigmoid_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sigmoid.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sign_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sign.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sin_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sin.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sinh_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sinh.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_sqrt_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_sqrt.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_tan_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_tan.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_tanh_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_tanh.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_trunc_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_trunc.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +void _foreach_zero_out(TensorList self, TensorList out); // {"schema": "aten::_foreach_zero.out(Tensor[] self, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_zero(TensorList self); // {"schema": "aten::_foreach_zero(Tensor[] self) -> Tensor[] self_out", "dispatch": "True", "default": "True"} +void _foreach_copy_out(TensorList self, TensorList src, bool non_blocking, TensorList out); // {"schema": "aten::_foreach_copy.out(Tensor[] self, Tensor[] src, bool non_blocking=False, *, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::vector _foreach_copy(TensorList self, TensorList src, bool non_blocking); // {"schema": "aten::_foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out", "dispatch": "True", "default": "True"} +Tensor & bucketize_out(const Scalar & self, const Tensor & boundaries, bool out_int32, bool right, Tensor & out); // {"schema": "aten::bucketize.Scalar_out(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & glu_jvp_out(const Tensor & glu, const Tensor & x, const Tensor & dx, int64_t dim, Tensor & out); // {"schema": "aten::glu_jvp.out(Tensor glu, Tensor x, Tensor dx, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & glu_backward_jvp_out(const Tensor & grad_x, const Tensor & grad_glu, const Tensor & x, const Tensor & dgrad_glu, const Tensor & dx, int64_t dim, Tensor & out); // {"schema": "aten::glu_backward_jvp.out(Tensor grad_x, Tensor grad_glu, Tensor x, Tensor dgrad_glu, Tensor dx, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & hardswish_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::hardswish_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & rrelu_with_noise_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, bool self_is_result, Tensor & out); // {"schema": "aten::rrelu_with_noise_backward.out(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & mkldnn_adaptive_avg_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::mkldnn_adaptive_avg_pool2d_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool2d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::_adaptive_avg_pool2d.out(Tensor self, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool2d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::_adaptive_avg_pool2d_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool3d_out(const Tensor & self, c10::SymIntArrayRef output_size, Tensor & out); // {"schema": "aten::_adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _adaptive_avg_pool3d_backward_out(const Tensor & grad_output, const Tensor & self, Tensor & out); // {"schema": "aten::_adaptive_avg_pool3d_backward.out(Tensor grad_output, Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _slow_conv2d_backward_out(const Tensor & grad_output, const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask, Tensor & out0, Tensor & out1, Tensor & out2); // {"schema": "aten::_slow_conv2d_backward.output_mask_out(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))", "dispatch": "True", "default": "True"} +Tensor & conv_depthwise3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::conv_depthwise3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slow_conv_dilated2d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_dilated2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slow_conv_dilated3d_out(const Tensor & self, const Tensor & weight, c10::SymIntArrayRef kernel_size, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, Tensor & out); // {"schema": "aten::slow_conv_dilated3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & isinf_out(const Tensor & self, Tensor & out); // {"schema": "aten::isinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & linalg_matrix_exp_out(const Tensor & self, Tensor & out); // {"schema": "aten::linalg_matrix_exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_optional_intlist_out(const Tensor & values, OptionalIntArrayRef addends, Tensor & out); // {"schema": "aten::_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_optional_filled_intlist_out(const Tensor & values, OptionalIntArrayRef addends, Tensor & out); // {"schema": "aten::_test_optional_filled_intlist.out(Tensor values, int[2]? addends, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_optional_floatlist_out(const Tensor & values, c10::optional> addends, Tensor & out); // {"schema": "aten::_test_optional_floatlist.out(Tensor values, float[]? addends, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_warn_in_autograd_out(const Tensor & self, Tensor & out); // {"schema": "aten::_test_warn_in_autograd.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_autograd_multiple_dispatch_out(const Tensor & self, Tensor & out); // {"schema": "aten::_test_autograd_multiple_dispatch.fullcoverage_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _test_autograd_multiple_dispatch_view_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_test_autograd_multiple_dispatch_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & segment_reduce_out(const Tensor & data, c10::string_view reduce, const c10::optional & lengths, const c10::optional & indices, const c10::optional & offsets, int64_t axis, bool unsafe, const c10::optional & initial, Tensor & out); // {"schema": "aten::segment_reduce.out(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, Tensor? offsets=None, int axis=0, bool unsafe=False, Scalar? initial=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _segment_reduce_backward_out(const Tensor & grad, const Tensor & output, const Tensor & data, c10::string_view reduce, const c10::optional & lengths, const c10::optional & offsets, int64_t axis, const c10::optional & initial, Tensor & out); // {"schema": "aten::_segment_reduce_backward.out(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _nested_tensor_from_tensor_list_out(TensorList list, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory, Tensor & out); // {"schema": "aten::_nested_tensor_from_tensor_list.out(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _fw_primal_copy_out(const Tensor & self, int64_t level, Tensor & out); // {"schema": "aten::_fw_primal_copy.out(Tensor self, int level, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _make_dual_copy_out(const Tensor & primal, const Tensor & tangent, int64_t level, Tensor & out); // {"schema": "aten::_make_dual_copy.out(Tensor primal, Tensor tangent, int level, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_as_real_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::view_as_real_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_as_complex_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::view_as_complex_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _conj_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_conj_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _neg_view_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_neg_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & as_strided_copy_out(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional storage_offset, Tensor & out); // {"schema": "aten::as_strided_copy.out(Tensor self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _sparse_broadcast_to_copy_out(const Tensor & self, IntArrayRef size, Tensor & out); // {"schema": "aten::_sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & diagonal_copy_out(const Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, Tensor & out); // {"schema": "aten::diagonal_copy.out(Tensor self, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & expand_copy_out(const Tensor & self, c10::SymIntArrayRef size, bool implicit, Tensor & out); // {"schema": "aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & permute_copy_out(const Tensor & self, IntArrayRef dims, Tensor & out); // {"schema": "aten::permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _reshape_alias_copy_out(const Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, Tensor & out); // {"schema": "aten::_reshape_alias_copy.out(Tensor self, SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & select_copy_out(const Tensor & self, int64_t dim, c10::SymInt index, Tensor & out); // {"schema": "aten::select_copy.int_out(Tensor self, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & detach_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::detach_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & slice_copy_out(const Tensor & self, int64_t dim, c10::optional start, c10::optional end, c10::SymInt step, Tensor & out); // {"schema": "aten::slice_copy.Tensor_out(Tensor self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::squeeze_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_copy_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::squeeze_copy.dim_out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & squeeze_copy_out(const Tensor & self, IntArrayRef dim, Tensor & out); // {"schema": "aten::squeeze_copy.dims_out(Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & t_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::t_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & transpose_copy_out(const Tensor & self, int64_t dim0, int64_t dim1, Tensor & out); // {"schema": "aten::transpose_copy.int_out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & unsqueeze_copy_out(const Tensor & self, int64_t dim, Tensor & out); // {"schema": "aten::unsqueeze_copy.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _values_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & values_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & crow_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::crow_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & col_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::col_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & ccol_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::ccol_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & row_indices_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::row_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_copy_out(const Tensor & self, c10::SymIntArrayRef size, Tensor & out); // {"schema": "aten::view_copy.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & view_copy_out(const Tensor & self, ScalarType dtype, Tensor & out); // {"schema": "aten::view_copy.dtype_out(Tensor self, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & unfold_copy_out(const Tensor & self, int64_t dimension, int64_t size, int64_t step, Tensor & out); // {"schema": "aten::unfold_copy.out(Tensor self, int dimension, int size, int step, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & alias_copy_out(const Tensor & self, Tensor & out); // {"schema": "aten::alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & to_padded_tensor_out(const Tensor & self, double padding, OptionalSymIntArrayRef output_size, Tensor & out); // {"schema": "aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _transformer_encoder_layer_fwd_out(const Tensor & src, int64_t embed_dim, int64_t num_heads, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const Tensor & norm_weight_1, const Tensor & norm_bias_1, const Tensor & norm_weight_2, const Tensor & norm_bias_2, const Tensor & ffn_weight_1, const Tensor & ffn_bias_1, const Tensor & ffn_weight_2, const Tensor & ffn_bias_2, const c10::optional & mask, c10::optional mask_type, Tensor & out); // {"schema": "aten::_transformer_encoder_layer_fwd.out(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +::std::tuple _native_multi_head_attention_out(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const c10::optional & mask, bool need_weights, bool average_attn_weights, c10::optional mask_type, Tensor & out0, Tensor & out1); // {"schema": "aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))", "dispatch": "True", "default": "True"} +Tensor & _triton_scaled_dot_attention_out(const Tensor & q, const Tensor & k, const Tensor & v, double dropout_p, Tensor & out); // {"schema": "aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _triton_multi_head_attention_out(const Tensor & query, const Tensor & key, const Tensor & value, int64_t embed_dim, int64_t num_head, const Tensor & qkv_weight, const Tensor & qkv_bias, const Tensor & proj_weight, const Tensor & proj_bias, const c10::optional & mask, Tensor & out); // {"schema": "aten::_triton_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +Tensor & _foobar_out(const Tensor & self, bool arg1, bool arg2, bool arg3, Tensor & out); // {"schema": "aten::_foobar.out(Tensor self, bool arg1=True, bool arg2=True, *, bool arg3=True, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "True"} +void _fused_adam_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adam.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adam(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adam(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_adam_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adam.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adam(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adam.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_adamw_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adamw.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adamw(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adamw(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_adamw_out(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, TensorList out); // {"schema": "aten::_fused_adamw.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> _fused_adamw(TensorList self, TensorList grads, TensorList exp_avgs, TensorList exp_avg_sqs, TensorList max_exp_avg_sqs, TensorList state_steps, const Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_adamw.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)", "dispatch": "True", "default": "True"} +void _fused_sgd_out(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale, const c10::optional & found_inf, TensorList out); // {"schema": "aten::_fused_sgd.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_sgd(Tensor[] self, Tensor[] grads, Tensor[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] momentum_buffer_list_out)", "dispatch": "True", "default": "True"} +void _fused_sgd_out(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, const Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale, const c10::optional & found_inf, TensorList out); // {"schema": "aten::_fused_sgd.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()", "dispatch": "True", "default": "True"} +::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(TensorList self, TensorList grads, TensorList momentum_buffer_list, double weight_decay, double momentum, const Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale, const c10::optional & found_inf); // {"schema": "aten::_fused_sgd.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] momentum_buffer_list_out)", "dispatch": "True", "default": "True"} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h new file mode 100644 index 0000000000000000000000000000000000000000..943ac161d4c18278d245b58257045ee990570de5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#else +#include +#endif + +namespace at::detail { +// When filling a number to 1-element CPU tensor, we want to skip +// everything but manipulate data ptr directly. +// Ideally this fast pass should be implemented in TensorIterator, +// but we also want to skip compute_types which in not avoidable +// in TensorIterator for now. +Tensor& scalar_fill(Tensor& self, const Scalar& value); +TORCH_API Tensor scalar_tensor_static( + const Scalar& s, + c10::optional dtype_opt, + c10::optional device_opt); +} // namespace at::detail + +// This is in the c10 namespace because we use ADL to find the functions in it. +namespace c10 { + +// FIXME: this should be (and was) Scalar::toTensor, but there is currently no +// way to implement this without going through Derived Types (which are not part +// of core). +inline at::Tensor scalar_to_tensor( + const Scalar& s, + const Device device = at::kCPU) { + // This is the fast track we have for CPU scalar tensors. + if (device == at::kCPU) { + return at::detail::scalar_tensor_static(s, s.type(), at::kCPU); + } + return at::scalar_tensor(s, at::device(device).dtype(s.type())); +} + +} // namespace c10 + +namespace at::native { + +inline Tensor wrapped_scalar_tensor( + const Scalar& scalar, + const Device device = at::kCPU) { + auto tensor = scalar_to_tensor(scalar, device); + tensor.unsafeGetTensorImpl()->set_wrapped_number(true); + return tensor; +} + +} // namespace at::native diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h new file mode 100644 index 0000000000000000000000000000000000000000..5d6285281f23ec9adf7d916d40d743283980e053 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h @@ -0,0 +1,2 @@ +#pragma once +#include diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..142665b7c8b27b64be1ca7b3b44aaca132141c77 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace at { + +// if dim_post_expr is 0 and wrap_scalar is true, then dim must be in the +// range [-1, 0]. This is a special case for scalar tensors and manifests in +// e.g. torch.sum(scalar_tensor, 0) Otherwise, dim should be in the range +// [-dim_post_expr, dim_post_expr-1]. +using c10::maybe_wrap_dim; + +inline int64_t maybe_wrap_dim(int64_t dim, TensorImpl* tensor) { + return maybe_wrap_dim(dim, tensor->dim()); +} + +inline int64_t maybe_wrap_dim(int64_t dim, TensorList tensors) { + if (tensors.empty()) { + // can't wrap empty TensorList; rely on underlying implementation to throw + // error if necessary. + return dim; + } + return maybe_wrap_dim(dim, tensors[0].dim()); +} + +inline int64_t maybe_wrap_dim( + int64_t dim, + const std::vector>& tensor_sizes) { + if (tensor_sizes.empty()) { + // can't wrap empty list; rely on underlying implementation to throw error + // if necessary + return dim; + } + return maybe_wrap_dim(dim, tensor_sizes[0].size()); +} + +// Given an array of dimensions `dims` of length `ndims`, this function "Wraps" +// each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be +// specified using negative indices. +// +// Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will +// allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for +// dimensions not in the range [-dim_post_expr, dim_post_expr). +inline void maybe_wrap_dims_n( + int64_t* dims, + int64_t ndims, + int64_t dim_post_expr, + bool wrap_scalars = true) { + if (dim_post_expr <= 0) { + if (wrap_scalars) { + dim_post_expr = 1; // this will make range [-1, 0] + } else { + TORCH_CHECK_INDEX( + ndims == 0, + "Dimension specified as ", + dims[0], + " but tensor has no dimensions"); + return; + } + } + int64_t min = -dim_post_expr; + int64_t max = dim_post_expr - 1; + for (const auto i : c10::irange(ndims)) { + auto& dim = dims[i]; + if (dim < min || dim > max) { + TORCH_CHECK_INDEX( + false, + "Dimension out of range (expected to be in range of [", + min, + ", ", + max, + "], but got ", + dim, + ")"); + } + if (dim < 0) + dim += dim_post_expr; + } +} + +// Given a contiguous container of dimensions `dims`, this function "Wraps" +// each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be +// specified using negative indices. +// +// Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will +// allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for +// dimensions not in the range [-dim_post_expr, dim_post_expr). +template +inline void maybe_wrap_dims( + Container& dims, + int64_t dim_post_expr, + bool wrap_scalars = true) { + return maybe_wrap_dims_n( + dims.data(), dims.size(), dim_post_expr, wrap_scalars); +} + +// previously, size [0] tensors were the only possible empty tensors; thus, it +// wasn't possible to cat empty tensors unless all the other tensors were +// 1-dimensional, so we allowed these tensors to be "skipped" (both for wrap +// dimension behavior and dimension size checking). We maintain this behavior +// for backwards compatibility, but only for this specific size (i.e. other +// empty sizes are not skipped). +template +inline int64_t _legacy_cat_wrap_dim( + int64_t dim, + const std::vector>& tensor_sizes) { + for (auto& sizes : tensor_sizes) { + if (sizes.size() == 1 && sizes[0] == 0) { + continue; + } + return maybe_wrap_dim(dim, sizes.size()); + } + return dim; +} + +inline int64_t legacy_cat_wrap_dim( + int64_t dim, + const std::vector>& tensor_sizes) { + return _legacy_cat_wrap_dim(dim, tensor_sizes); +} + +inline int64_t legacy_cat_wrap_dim_symint( + int64_t dim, + const std::vector>& tensor_sizes) { + return _legacy_cat_wrap_dim(dim, tensor_sizes); +} + +inline int64_t legacy_cat_wrap_dim( + int64_t dim, + const MaterializedITensorListRef& tensors) { + for (const Tensor& tensor : tensors) { + if (tensor.dim() == 1 && tensor.sizes()[0] == 0) { + continue; + } + return maybe_wrap_dim(dim, tensor.dim()); + } + return dim; +} + +// wrap negative dims in a vector +inline void wrap_all_dims( + std::vector& dims_to_wrap, + int64_t tensor_total_dims) { + for (const auto i : c10::irange(dims_to_wrap.size())) { + dims_to_wrap[i] = maybe_wrap_dim(dims_to_wrap[i], tensor_total_dims); + } +} + +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h new file mode 100644 index 0000000000000000000000000000000000000000..393e322e6fe66c8f6d11db9c968800ed5134b61c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h @@ -0,0 +1,243 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace at::jit { + +// A template environment is a mapping from template variable names, e.g., +// identifier (corresponding to $identifier) to their expansions. +// +// This template environment supports storing strings, numbers and lists +// of strings, and can be chained together (so that lookup proceeds in +// in the top level environment, and then recurses into a parent +// environment if the key is not found.) +struct TemplateEnv { + TemplateEnv() = default; + TemplateEnv(TemplateEnv& parent) : parent(&parent) {} + + using string_list = std::vector; + + // Add a string 'v' to the map at key 'k'. + void s(const std::string& k, const std::string& v) { + strings_[k] = v; + lists_.erase(k); + } + + // Add a number 'v' to the map at key 'k' + template + void d(const std::string& k, const T& v) { + strings_[k] = c10::to_string(v); + lists_.erase(k); + } + + // Retrieve the string representation of the value stored at 'k' from the map. + // Raises an exception if the key is not found. + const std::string& s(const std::string& k) const { + if (strings_.count(k) == 0) { + if (parent) { + return parent->s(k); + } + notFound(k); + } + return strings_.at(k); + } + + // Store a list of strings 'v' in the map at 'k'. + void v(const std::string& k, const string_list& v) { + lists_[k] = v; + strings_.erase(k); + } + + // Retrieve a list of strings stored at 'k' from the map. + // Raises an exception if the key is not found. + const string_list& v(const std::string& k) const { + if (lists_.count(k) == 0) { + if (parent) { + return parent->v(k); + } + notFound(k); + } + return lists_.at(k); + } + + // Test if a string 'k' is a string (as opposed to a list.) + bool keyIsString(const std::string& k) const { + if (strings_.count(k) > 0) + return true; + if (lists_.count(k) > 0) + return false; + if (parent) + return parent->keyIsString(k); + notFound(k); + } + + private: + [[noreturn]] void notFound(const std::string& k) const { + std::stringstream ss; + ss << "key not found: " << k; + throw std::logic_error(ss.str()); + } + + std::unordered_map strings_; + std::unordered_map lists_; + TemplateEnv* parent{nullptr}; +}; + +/* +# Match $identifier or ${identifier} and replace with the value in env. +# If this identifier is at the beginning of whitespace on a line +# and its value is a list then it is treated as +# block substitution by indenting all lines of all elements. +# If the identifier is on a line starting with non-whitespace and a list +# then it is comma separated. ${,foo} will insert a comma before the list +# if this list is not empty and ${foo,} will insert one after. +*/ +struct CodeTemplate { + /* implicit */ CodeTemplate(std::string t) : template_text(std::move(t)) {} + + std::string format(const TemplateEnv& env) const { + std::stringstream out; + size_t pos = 0; + size_t indent = 0; + bool all_whitespace = true; + while (pos < template_text.size()) { + char c = template_text[pos]; + if (c == '$') { + std::stringstream kss; + // NOLINTNEXTLINE(cppcoreguidelines-init-variables) + bool comma_before; + // NOLINTNEXTLINE(cppcoreguidelines-init-variables) + bool comma_after; + size_t new_pos = parseKey(pos, kss, comma_before, comma_after); + std::string k = kss.str(); + bool is_string = env.keyIsString(k); + if (all_whitespace) { + if (is_string) + emitStringWithIndents(out, indent, env.s(k)); + else + emitLinesIndented(out, indent, env.v(k)); + } else { + if (is_string) + out << env.s(k); + else + emitCommaSeparatedList(out, env.v(k), comma_before, comma_after); + } + all_whitespace = false; + pos = new_pos; + } else { + out << c; + if (!isspace(c)) + all_whitespace = false; + indent++; + if (c == '\n') { + indent = 0; + all_whitespace = true; + } + pos++; + } + } + return out.str(); + } + + private: + using string_list = std::vector; + char charAt(size_t p) const { + if (p >= template_text.size()) + throw std::logic_error("EOS found in key"); + return template_text[p]; + } + size_t parseKey( + size_t pos, + std::ostream& k, + bool& comma_before, + bool& comma_after) const { + comma_before = false; + comma_after = false; + pos++; + if (charAt(pos) == '{') { + pos++; + if (charAt(pos) == ',') { + comma_before = true; + pos++; + } + pos = parseIdent(pos, k); + if (charAt(pos) == ',') { + comma_after = true; + pos++; + } + if (charAt(pos) != '}') + throw std::logic_error("missing terminating '}'"); + pos++; + return pos; + } else { + return parseIdent(pos, k); + } + } + size_t parseIdent(size_t pos, std::ostream& k) const { + while (pos < template_text.size() && + (isalnum(template_text[pos]) || template_text[pos] == '_')) { + k << template_text[pos]; + pos++; + } + return pos; + } + void emitCommaSeparatedList( + std::ostream& out, + const string_list& strings, + bool comma_before, + bool comma_after) const { + if (comma_before && !strings.empty()) + out << ", "; + for (const auto i : c10::irange(strings.size())) { + if (i > 0) + out << ", "; + out << strings[i]; + } + if (comma_after && !strings.empty()) + out << ", "; + } + // These indentation functions follow the convention that they never emit + // leading or trailing newlines when the input string does not have leading + // or trailing newlines. It's the responsibility of the calling function + // to indent correctly in the context. + void emitIndent(std::ostream& out, size_t indent) const { + for (C10_UNUSED const auto i : c10::irange(indent)) { + out << " "; + } + } + void emitStringWithIndents( + std::ostream& out, + size_t indent, + const std::string& str) const { + for (auto c : str) { + out << c; + if (c == '\n') { + emitIndent(out, indent); + } + } + } + void emitLinesIndented( + std::stringstream& out, + size_t indent, + const string_list& strings) const { + for (const auto i : c10::irange(strings.size())) { + if (i > 0) + emitIndent(out, indent); + emitStringWithIndents(out, indent, strings[i]); + if (i + 1 != strings.size()) + out << "\n"; + } + } + std::string template_text; +}; + +static inline std::string format(const std::string& fmt, TemplateEnv& env) { + return CodeTemplate(fmt).format(env); +} + +} // namespace at::jit diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1164daf58526c797f3b16f9646130512c1107659 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _adaptive_avg_pool2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API at::Tensor & _adaptive_avg_pool2d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out); +TORCH_API at::Tensor & _adaptive_avg_pool2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size); +TORCH_API at::Tensor & _adaptive_avg_pool2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1dda8a7f900b507be662d7e3c4f40cc6227d10ba --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _cast_Short(const at::Tensor & self, bool non_blocking=false); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..25d577c9e9c8f1ddc4d729e935c3923dad363ca4 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _debug_has_internal_overlap { + using schema = int64_t (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_debug_has_internal_overlap") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_debug_has_internal_overlap(Tensor self) -> int") + static int64_t call(const at::Tensor & self); + static int64_t redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..98424bedb1da9767cef26fc0e6514ac8f7c98070 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _fake_quantize_learnable_per_tensor_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor=1.0); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b678ec3b329ae312e77441ff085d4bb8cbd83e1d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _flash_attention_backward(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, c10::optional scale=c10::nullopt); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h new file mode 100644 index 0000000000000000000000000000000000000000..09c6cc795f7594dd36006b24012700ca691fedf9 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h @@ -0,0 +1,25 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _foreach_acos_out(at::TensorList self, at::TensorList out); +TORCH_API ::std::vector foreach_tensor_acos_slow(at::TensorList self); +TORCH_API void foreach_tensor_acos_slow_(at::TensorList self); +TORCH_API ::std::vector foreach_tensor_acos_cuda(at::TensorList self); +TORCH_API void foreach_tensor_acos_cuda_(at::TensorList self); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..87421e4550654a218e1226a9ea47c75abba07643 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__linalg_slogdet : public at::impl::MetaBase { + + + void meta(const at::Tensor & A); +}; + +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2829f9c3f0b5ec74fce1c41fa4c924d0a1318709 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _linalg_svd(const at::Tensor & A, bool full_matrices=false, bool compute_uv=true, c10::optional driver=c10::nullopt); +TORCH_API ::std::tuple _linalg_svd_out(at::Tensor & U, at::Tensor & S, at::Tensor & Vh, const at::Tensor & A, bool full_matrices=false, bool compute_uv=true, c10::optional driver=c10::nullopt); +TORCH_API ::std::tuple _linalg_svd_outf(const at::Tensor & A, bool full_matrices, bool compute_uv, c10::optional driver, at::Tensor & U, at::Tensor & S, at::Tensor & Vh); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h new file mode 100644 index 0000000000000000000000000000000000000000..2beb0efa43fd34b9aaef8f8b75e0fc5b5bf76a89 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_logcumsumexp(Tensor self, int dim) -> Tensor +inline at::Tensor _logcumsumexp(const at::Tensor & self, int64_t dim) { + return at::_ops::_logcumsumexp::call(self, dim); +} + +// aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _logcumsumexp_out(at::Tensor & out, const at::Tensor & self, int64_t dim) { + return at::_ops::_logcumsumexp_out::call(self, dim, out); +} +// aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _logcumsumexp_outf(const at::Tensor & self, int64_t dim, at::Tensor & out) { + return at::_ops::_logcumsumexp_out::call(self, dim, out); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h new file mode 100644 index 0000000000000000000000000000000000000000..65719a2a7c1fa521000381ae8865d8c045b9012e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor) +inline ::std::tuple _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional mask_type=c10::nullopt) { + return at::_ops::_native_multi_head_attention::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type); +} + +// aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _native_multi_head_attention_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional mask_type=c10::nullopt) { + return at::_ops::_native_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type, out0, out1); +} +// aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _native_multi_head_attention_outf(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask, bool need_weights, bool average_attn_weights, c10::optional mask_type, at::Tensor & out0, at::Tensor & out1) { + return at::_ops::_native_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type, out0, out1); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h new file mode 100644 index 0000000000000000000000000000000000000000..306bc172714467940349b1571a585903f5513f99 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _nested_tensor_storage_offsets_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor _nested_tensor_storage_offsets(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ecddb0aa954bcdc11fed80bfd89b7b149c86605e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor log_softmax_backward_sparse_cpu(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +TORCH_API at::Tensor log_softmax_backward_sparse_cuda(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1a31dba65058e15b5b79fa9294f40ad83401de96 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_sparse_dim_out(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor & _to_sparse_out(const at::Tensor & self, c10::optional layout, at::OptionalIntArrayRef blocksize, c10::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, c10::optional layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional dense_dim=c10::nullopt); +TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, c10::optional layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional dense_dim=c10::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, c10::optional layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional dense_dim=c10::nullopt); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..cb821be2a9371eaa56b4d45531ad1925667f1e42 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact2d_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, c10::optional scales_h, c10::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..34f28baa7dc79f1d66c7c9d7a8a0afd2bd0f8271 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h @@ -0,0 +1,30 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> () +inline void _validate_sparse_csr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size) { + return at::_ops::_validate_sparse_csr_tensor_args::call(crow_indices, col_indices, values, size); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2013ec008cbd6b265d71ebbe096c64cc84a415a1 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API align_as { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::align_as") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "align_as(Tensor self, Tensor other) -> Tensor") + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..74438d94d66c2f5675528ccb2bec006caec8100c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_aminmax : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, c10::optional dim, bool keepdim); +}; + +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..cb65fcf47b89f6cf6be6c1127cc0c5e3adc8c9fd --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h @@ -0,0 +1,50 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API ceil { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil(Tensor self) -> Tensor") + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API ceil_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil_(Tensor(a!) self) -> Tensor(a!)") + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API ceil_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3aa45802c9e6dad3c7611df2d44375fcd4f37a31 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value=0); +TORCH_API at::Tensor constant_pad_nd_symint(const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value=0); +TORCH_API at::Tensor & constant_pad_nd_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value=0); +TORCH_API at::Tensor & constant_pad_nd_outf(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value, at::Tensor & out); +TORCH_API at::Tensor & constant_pad_nd_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value=0); +TORCH_API at::Tensor & constant_pad_nd_symint_outf(const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5b1afbaf3fad2ce3d9412807d2725e44663d2471 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor dot(const at::Tensor & self, const at::Tensor & tensor); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h new file mode 100644 index 0000000000000000000000000000000000000000..6469d6c39681b72e484a34d3e0a2211d9d6f7783 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h @@ -0,0 +1,91 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor +inline at::Tensor fft_hfft(const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm); +} +namespace symint { + template ::value>> + at::Tensor fft_hfft(const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm); + } +} + +// aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor +inline at::Tensor fft_hfft_symint(const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft::call(self, n, dim, norm); +} +namespace symint { + template ::value>> + at::Tensor fft_hfft(const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft::call(self, n, dim, norm); + } +} + +// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out); +} +namespace symint { + template ::value>> + at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out); + } +} + +// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional n, int64_t dim, c10::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out); +} +namespace symint { + template ::value>> + at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional n, int64_t dim, c10::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out); + } +} + +// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft_symint_out(at::Tensor & out, const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft_out::call(self, n, dim, norm, out); +} +namespace symint { + template ::value>> + at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional n=c10::nullopt, int64_t dim=-1, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfft_out::call(self, n, dim, norm, out); + } +} + +// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fft_hfft_symint_outf(const at::Tensor & self, c10::optional n, int64_t dim, c10::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft_out::call(self, n, dim, norm, out); +} +namespace symint { + template ::value>> + at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional n, int64_t dim, c10::optional norm, at::Tensor & out) { + return at::_ops::fft_hfft_out::call(self, n, dim, norm, out); + } +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5b4dc96b6c1a7f4697b74304f3e4579aa7c2a9c4 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_irfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional norm=c10::nullopt); +TORCH_API at::Tensor & fft_irfft2_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, c10::optional norm, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bb600b7a9e499342d9ddc80aee5394f53aba20fd --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_rfftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt); +TORCH_API at::Tensor & fft_rfftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h new file mode 100644 index 0000000000000000000000000000000000000000..6b236f2a38c7803f91cb8f78acb0b221af515d8b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h @@ -0,0 +1,44 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fix(Tensor self) -> Tensor +inline at::Tensor fix(const at::Tensor & self) { + return at::_ops::fix::call(self); +} + +// aten::fix_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & fix_(at::Tensor & self) { + return at::_ops::fix_::call(self); +} + +// aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fix_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::fix_out::call(self, out); +} +// aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fix_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::fix_out::call(self, out); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..637e920813d050358e6ab607ee9c6b0021eb8528 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Tensor & exponent, at::Tensor & out); +TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor float_power(const at::Scalar & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Scalar & self, const at::Tensor & exponent, at::Tensor & out); +TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Scalar & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Scalar & exponent, at::Tensor & out); +TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Scalar & exponent); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5615ab89f3eaf2e83c745e344afeb74c8bd4ffd4 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor & geometric_(at::Tensor & self, double p, c10::optional generator=c10::nullopt); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..100902cef992de25d43b9c97379b417ca54bdb2b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API glu_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::glu_backward") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input); +}; + +struct TORCH_API glu_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::glu_backward") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor") + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h new file mode 100644 index 0000000000000000000000000000000000000000..513a65fc134272481eadd034829b86a80c7db825 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_hardshrink_out : public at::meta::structured_hardshrink { +void impl(const at::Tensor & self, const at::Scalar & lambd, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46aae89384310e93a51ad04c0626b6c782ad745c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor hardtanh_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); +TORCH_API at::Tensor & hardtanh_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val); +TORCH_API at::Tensor & hardtanh_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5e8ad855dda3d589b9df3ec6d86da5d65220b07c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor isposinf(const at::Tensor & self); +TORCH_API at::Tensor & isposinf_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & isposinf_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..99b0f6716cedbd169834f261cafd4382d460d38b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor le(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & le_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor le(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & le_(at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3dcbd8ee55d501448ed804d5925eaf1ad56f5732 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple linalg_qr(const at::Tensor & A, c10::string_view mode="reduced"); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..7ca2d33e400c0457662c39fd7923d402abec4997 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple linalg_svd(const at::Tensor & A, bool full_matrices=true, c10::optional driver=c10::nullopt); +TORCH_API ::std::tuple linalg_svd_out(const at::Tensor & A, bool full_matrices, c10::optional driver, at::Tensor & U, at::Tensor & S, at::Tensor & Vh); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..b4d895e76770670a078c71ffeb0f9f7a915372b6 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_linalg_vector_norm : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, const at::Scalar & ord, at::OptionalIntArrayRef dim, bool keepdim, c10::optional dtype); +}; + +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f73d98a9858f70f8517701303eac9776b0749857 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor logaddexp2(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & logaddexp2_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & logaddexp2_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..f72c60ce80c7b62e642d149d95220078b1bd9d30 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h @@ -0,0 +1,30 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) +inline ::std::tuple max_pool1d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool1d_with_indices::call(self, kernel_size, stride, padding, dilation, ceil_mode); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c652ea8a4776691a1287fa6eed1afa532e752dd5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API max_pool2d_with_indices_out { + using schema = ::std::tuple (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool2d_with_indices") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))") + static ::std::tuple call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices); +}; + +struct TORCH_API max_pool2d_with_indices { + using schema = ::std::tuple (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool2d_with_indices") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)") + static ::std::tuple call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..abe2fee1680eb1b759327d9a4fb11d182c2ca87a --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API miopen_convolution_relu { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const c10::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::miopen_convolution_relu") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor") + static at::Tensor call(const at::Tensor & self, const at::Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, const c10::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d1dbd18bb708c09ec070b4b558418d1090f7a27b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple mkldnn_rnn_layer_backward(const at::Tensor & input, const at::Tensor & weight1, const at::Tensor & weight2, const at::Tensor & weight3, const at::Tensor & weight4, const at::Tensor & hx_, const at::Tensor & cx_tmp, const at::Tensor & output, const at::Tensor & hy_, const at::Tensor & cy_, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes, bool batch_first, const at::Tensor & workspace); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/narrow_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/narrow_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8cac29066855d85ff335ae02608fde7ec841711b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/narrow_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor narrow_symint(const at::Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); +TORCH_API at::Tensor narrow_nested_symint(const at::Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); +TORCH_API at::Tensor narrow_tensor_symint(const at::Tensor & self, int64_t dim, const at::Tensor & start, c10::SymInt length); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/one_hot.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/one_hot.h new file mode 100644 index 0000000000000000000000000000000000000000..6617cfa53c19f659054645b55cd13feaed9dd610 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/one_hot.h @@ -0,0 +1,30 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::one_hot(Tensor self, int num_classes=-1) -> Tensor +inline at::Tensor one_hot(const at::Tensor & self, int64_t num_classes=-1) { + return at::_ops::one_hot::call(self, num_classes); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3e94b3734f279106580d9446f559ca3b441b7e11 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor reflection_pad3d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor reflection_pad3d_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & reflection_pad3d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding, at::Tensor & grad_input); +TORCH_API at::Tensor & reflection_pad3d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad3d_backward_symint_outf(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/reshape_as_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/reshape_as_native.h new file mode 100644 index 0000000000000000000000000000000000000000..0b47e8ece13c8e74e6ad6b03e90700066f408887 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/reshape_as_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor reshape_as(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor reshape_as_nested(const at::Tensor & self, const at::Tensor & other); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h new file mode 100644 index 0000000000000000000000000000000000000000..dc29dc0e5a7a0db12772f82e6d3a1998fe48a9fc --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor rnn_relu_cell(const at::Tensor & input, const at::Tensor & hx, const at::Tensor & w_ih, const at::Tensor & w_hh, const c10::optional & b_ih={}, const c10::optional & b_hh={}); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/scatter_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/scatter_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5cd290b15024f4befaf2b5ffec55399c27262e88 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/scatter_native.h @@ -0,0 +1,34 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_scatter_src_out : public at::meta::structured_scatter_src { +void impl(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & src, const at::Tensor & out); +}; +struct TORCH_API structured_scatter_value_out : public at::meta::structured_scatter_value { +void impl(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value, const at::Tensor & out); +}; +struct TORCH_API structured_scatter_reduce_out : public at::meta::structured_scatter_reduce { +void impl(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, const at::Tensor & out); +}; +struct TORCH_API structured_scatter_value_reduce_out : public at::meta::structured_scatter_value_reduce { +void impl(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce, const at::Tensor & out); +}; +TORCH_API at::Tensor scatter(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & src); +TORCH_API at::Tensor scatter(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sigmoid_backward_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sigmoid_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..04728d6bcfe9a0bc4eda540d65d977c8417fe1a9 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sigmoid_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & output); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/softshrink_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/softshrink_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..caebd9a5cee5115a09a8fc5da811f14d691f24c0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/softshrink_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor softshrink(const at::Tensor & self, const at::Scalar & lambd=0.5); +TORCH_API at::Tensor & softshrink_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & lambd=0.5); +TORCH_API at::Tensor & softshrink_outf(const at::Tensor & self, const at::Scalar & lambd, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_airy_ai_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_airy_ai_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cbb15b64c2b26514a8cfc53d1b15bd0e597e602c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_airy_ai_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_airy_ai(const at::Tensor & x); +TORCH_API at::Tensor & special_airy_ai_out(at::Tensor & out, const at::Tensor & x); +TORCH_API at::Tensor & special_airy_ai_outf(const at::Tensor & x, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_modified_bessel_i1_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_modified_bessel_i1_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e2c24ec7d06c728b5345107f2d5e27751e230197 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_modified_bessel_i1_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_special_modified_bessel_i1_out : public at::meta::structured_special_modified_bessel_i1 { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_ndtri_meta_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_ndtri_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3eb33af46206a3b4d12d2332a335fd95656e83d4 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_ndtri_meta_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_ndtri(const at::Tensor & self); +TORCH_API at::Tensor & special_ndtri_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_ndtri_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_t_meta_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_t_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7e8ebbc1e66ecf2b94b3af21df2f9021c83311fd --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_t_meta_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_t(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_t_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_t_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/split_copy_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/split_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..bbf78eb2a7a98599c38a3c7802c095d89d2d610f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/split_copy_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API split_copy_Tensor { + using schema = ::std::vector (const at::Tensor &, c10::SymInt, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::split_copy") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "split_copy.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]") + static ::std::vector call(const at::Tensor & self, c10::SymInt split_size, int64_t dim); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymInt split_size, int64_t dim); +}; + +struct TORCH_API split_copy_Tensor_out { + using schema = void (const at::Tensor &, c10::SymInt, int64_t, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::split_copy") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor_out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "split_copy.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()") + static void call(const at::Tensor & self, c10::SymInt split_size, int64_t dim, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymInt split_size, int64_t dim, at::TensorList out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sym_storage_offset_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sym_storage_offset_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9828d6c5f78695d549761100739ffba887929ae5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sym_storage_offset_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API c10::SymInt sym_storage_offset(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/topk_meta_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/topk_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..484d95c32872e5d10ead57b5aea9ed548c53c272 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/topk_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API ::std::tuple topk(const at::Tensor & self, int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_symint(const at::Tensor & self, c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_outf(const at::Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices); +TORCH_API ::std::tuple topk_symint_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_symint_outf(const at::Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices); + +} // namespace meta +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/triu_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/triu_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..dac4e757fbccaf75d7094e5e88e8faefffa0b689 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/triu_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor triu(const at::Tensor & self, int64_t diagonal=0); +TORCH_API at::Tensor & triu_(at::Tensor & self, int64_t diagonal=0); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/istream_adapter.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/istream_adapter.h new file mode 100644 index 0000000000000000000000000000000000000000..680c288a15f2e89d5a3b8b51d8aaddf44f727d19 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/istream_adapter.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include "c10/macros/Macros.h" +#include "caffe2/serialize/read_adapter_interface.h" + +namespace caffe2 { +namespace serialize { + +// this is a reader implemented by std::istream +class TORCH_API IStreamAdapter final : public ReadAdapterInterface { + public: + C10_DISABLE_COPY_AND_ASSIGN(IStreamAdapter); + explicit IStreamAdapter(std::istream* istream); + size_t size() const override; + size_t read(uint64_t pos, void* buf, size_t n, const char* what = "") + const override; + ~IStreamAdapter() override; + + private: + std::istream* istream_; + void validate(const char* what) const; +}; + +} // namespace serialize +} // namespace caffe2 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/read_adapter_interface.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/read_adapter_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..0a6b5b74a762e7c90b64a6f93717802f658dd164 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/caffe2/serialize/read_adapter_interface.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +#include "c10/macros/Macros.h" + +namespace caffe2 { +namespace serialize { + +// this is the interface for the (file/stream/memory) reader in +// PyTorchStreamReader. with this interface, we can extend the support +// besides standard istream +class TORCH_API ReadAdapterInterface { + public: + virtual size_t size() const = 0; + virtual size_t read(uint64_t pos, void* buf, size_t n, const char* what = "") + const = 0; + virtual ~ReadAdapterInterface(); +}; + +} // namespace serialize +} // namespace caffe2 diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl.h new file mode 100644 index 0000000000000000000000000000000000000000..bc74bf644f4b628018d7a9103ba63320abc466d5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl.h @@ -0,0 +1,22 @@ +/******************************************************************************* +* Copyright 2020 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_H +#define DNNL_H + +#include "oneapi/dnnl/dnnl.h" + +#endif /* DNNL_H */ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl_debug.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl_debug.h new file mode 100644 index 0000000000000000000000000000000000000000..5044971832bbbe56127920a527508b207a803eea --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl_debug.h @@ -0,0 +1,22 @@ +/******************************************************************************* +* Copyright 2020 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_DEBUG_H +#define DNNL_DEBUG_H + +#include "oneapi/dnnl/dnnl_debug.h" + +#endif /* DNNL_DEBUG_H */ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl_threadpool.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl_threadpool.h new file mode 100644 index 0000000000000000000000000000000000000000..e27e584a65ed16740d4fde93da3a1a049dd111aa --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/dnnl_threadpool.h @@ -0,0 +1,22 @@ +/******************************************************************************* +* Copyright 2020 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_THREADPOOL_H +#define DNNL_THREADPOOL_H + +#include "oneapi/dnnl/dnnl_threadpool.h" + +#endif /* DNNL_THREADPOOL_H */ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/nnpack.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/nnpack.h new file mode 100644 index 0000000000000000000000000000000000000000..97b5ff390076e9ab7ae91e67bfc0d78736aaeffd --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/nnpack.h @@ -0,0 +1,659 @@ +#pragma once + +#include +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Status code for any NNPACK function call. + */ +enum nnp_status { + /** The call succeeded, and all output arguments now contain valid data. */ + nnp_status_success = 0, + /** NNPACK function was called with batch_size == 0. */ + nnp_status_invalid_batch_size = 2, + /** NNPACK function was called with channels == 0. */ + nnp_status_invalid_channels = 3, + /** NNPACK function was called with input_channels == 0. */ + nnp_status_invalid_input_channels = 4, + /** NNPACK function was called with output_channels == 0. */ + nnp_status_invalid_output_channels = 5, + /** NNPACK function was called with input_size.height == 0 or input_size.width == 0 */ + nnp_status_invalid_input_size = 10, + /** NNPACK function was called with input_stride.height == 0 or input_stride.width == 0 */ + nnp_status_invalid_input_stride = 11, + /** NNPACK function was called with input_padding not less than respective kernel (or pooling) size, i.e.: + * + * - input_padding.left >= kernel_size.width (>= pooling_size.width) + * - input_padding.right >= kernel_size.width (>= pooling_size.width) + * - input_padding.top >= kernel_size.height (>= pooling_size.height) + * - input_padding.bottom >= kernel_size.height (>= pooling_size.height) + */ + nnp_status_invalid_input_padding = 12, + /** NNPACK function was called with kernel_size.height == 0 or kernel_size.width == 0 */ + nnp_status_invalid_kernel_size = 13, + /** NNPACK function was called with pooling_size.height == 0 or pooling_size.width == 0 */ + nnp_status_invalid_pooling_size = 14, + /** NNPACK function was called with pooling_stride.height == 0 or pooling_stride.width == 0 */ + nnp_status_invalid_pooling_stride = 15, + /** NNPACK function was called with convolution algorithm not in nnp_convolution_algorithm enumeration */ + nnp_status_invalid_algorithm = 16, + /** NNPACK function was called with convolution transform strategy not in nnp_convolution_transform_strategy enum */ + nnp_status_invalid_transform_strategy = 17, + /** NNPACK function was called with output_subsampling.height == 0 or output_subsampling.width == 0 */ + nnp_status_invalid_output_subsampling = 13, + /** NNPACK function was called with activation not in nnp_activation enum */ + nnp_status_invalid_activation = 14, + /** NNPACK function was called with invalid activation parameters */ + nnp_status_invalid_activation_parameters = 15, + + /** NNPACK does not support the particular input size for the function */ + nnp_status_unsupported_input_size = 20, + /** NNPACK does not support the particular input stride for the function */ + nnp_status_unsupported_input_stride = 21, + /** NNPACK does not support the particular input padding for the function */ + nnp_status_unsupported_input_padding = 22, + /** NNPACK does not support the particular kernel size for the function */ + nnp_status_unsupported_kernel_size = 23, + /** NNPACK does not support the particular pooling size for the function */ + nnp_status_unsupported_pooling_size = 24, + /** NNPACK does not support the particular pooling stride for the function */ + nnp_status_unsupported_pooling_stride = 25, + /** NNPACK does not support the particular convolution algorithm for the function */ + nnp_status_unsupported_algorithm = 26, + /** NNPACK does not support the particular convolution transform strategy for the algorithm */ + nnp_status_unsupported_transform_strategy = 27, + /** NNPACK does not support the particular activation function for the function */ + nnp_status_unsupported_activation = 28, + /** NNPACK does not support the particular activation function parameters for the function */ + nnp_status_unsupported_activation_parameters = 29, + + /** NNPACK function was called before the library was initialized */ + nnp_status_uninitialized = 50, + /** NNPACK does not implement this function for the host CPU */ + nnp_status_unsupported_hardware = 51, + /** NNPACK failed to allocate memory for temporary buffers */ + nnp_status_out_of_memory = 52, + /** Scratch space buffer is too small */ + nnp_status_insufficient_buffer = 53, + /** Scratch space buffer is not properly aligned */ + nnp_status_misaligned_buffer = 54 +}; + +/** + * @brief Activation applied applied after a convolutional or fully-connected layer. + */ +enum nnp_activation { + /** Identity activation f(x) := x, i.e. no transformation */ + nnp_activation_identity = 0, + /** ReLU activation f(x) := max(0, x) */ + nnp_activation_relu = 1, +}; + +/** + * @brief Algorithm for computing convolutional layers. + */ +enum nnp_convolution_algorithm { + /** Let NNPACK choose the algorithm depending on layer parameters */ + nnp_convolution_algorithm_auto = 0, + /** Tiled convolution based on 2D Fourier transform with 8x8 blocks. Supports kernels up to 8x8. */ + nnp_convolution_algorithm_ft8x8 = 1, + /** Tiled convolution based on 2D Fourier transform with 16x16 blocks. Supports kernels up to 16x16. */ + nnp_convolution_algorithm_ft16x16 = 2, + /** Tiled convolution based on 2D Winograd transform F(3x3, 6x6) with 8x8 blocks. Supports only 3x3 kernels. */ + nnp_convolution_algorithm_wt8x8 = 3, + /** Direct convolution via implicit GEMM. */ + nnp_convolution_algorithm_implicit_gemm = 4, + /** Direct convolution implementation. */ + nnp_convolution_algorithm_direct = 5, + /** + * Tiled convolution based on 2D Winograd transform F(3x3, 6x6) with 8x8 blocks in FP16. + * Supports only 3x3 kernels. Implemented only for new ARM processors (with NEON-HP), + * on non-supported processors falls back to nnp_convolution_algorithm_wt8x8. + */ + nnp_convolution_algorithm_wt8x8_fp16 = 6, +}; + +enum nnp_convolution_transform_strategy { + nnp_convolution_transform_strategy_compute = 1, + nnp_convolution_transform_strategy_precompute = 2, + nnp_convolution_transform_strategy_reuse = 3 +}; + +/* For backward compatibility */ +#define nnp_convolution_transform_strategy_block_based nnp_convolution_transform_strategy_compute +#define nnp_convolution_transform_strategy_tuple_based nnp_convolution_transform_strategy_compute + +/** + * @brief Size of images, kernels, and pooling filters in NNPACK. + */ +struct nnp_size { + /** Width (horizontal size) of an image, kernel, or pooling filter. */ + size_t width; + /** Height (vertical size) of an image, kernel, or pooling filter. */ + size_t height; +}; + +/** + * @brief Padding of images in NNPACK. + */ +struct nnp_padding { + /** Padding above the image data */ + size_t top; + /** Padding on the right of image data */ + size_t right; + /** Padding below the image data */ + size_t bottom; + /** Padding on the left of image data */ + size_t left; +}; + +/** + * @brief Profiling information about time spent in different phases of a function call. + */ +struct nnp_profile { + /** Time spent inside the function call, in seconds. */ + double total; + /** Time spend on transformation of the input or input gradient tensor, in seconds. */ + double input_transform; + /** Time spend on transformation of the kernel or kernel gradient tensor, in seconds. */ + double kernel_transform; + /** Time spend on transformation of the output or output gradient tensor, in seconds. */ + double output_transform; + /** Time spend on multiplication-accumulation of transformed coefficients, in seconds. */ + double block_multiplication; +}; + +enum nnp_status nnp_initialize(void); + +enum nnp_status nnp_deinitialize(void); + +/** + * @brief Computes output of a 2D convolutional layer from input and kernel tensors. + * @details This function targets training of convolutional neural networks and performs forward propagation. + * It is optimized for moderate minibatch sizes (64-128) and can be inefficient on a small minibatch. + * For minibatch size 1, use nnp_convolution_inference for optimal performance. + * @param algorithm The type of algorithm to use for convolution. Possible values are: + * + * - nnp_convolution_algorithm_auto -- let the function choose the algorithm. + * - nnp_convolution_algorithm_ft8x8 -- tiled convolution based on 2D Fourier transform with 8x8 blocks. + * Supports kernels up to 8x8. + * - nnp_convolution_algorithm_ft16x16 -- tiled convolution based on 2D Fourier transform with 16x16 blocks. + * Supports kernels up to 16x16. + * - nnp_convolution_algorithm_wt8x8 -- tiled convolution based on 2D Winograd transform F(3x3, 6x6). + * Supports only 3x3 kernels. + * + * @param batch_size The number of images on the input and output of the convolutional layer. + * @param input_channels The number of channels (AKA features, dimensions) in the input images. + * @param output_channels The number of channels (AKA features, dimensions) in the output images. + * @param input_size Size of input images, excluding implicit zero-padding. + * @param input_padding Implicit zero-padding of input images. + * @param kernel_size Kernel size. + * @param[in] input A 4D tensor input[batch_size][input_channels][input_size.height][input_size.width]. + * @param[in] kernel A 4D tensor kernel[output_channels][input_channels][kernel_size.height][kernel_size.width]. + * @param[in] bias A 1D array bias[output_channels]. + * @param[out] output A 4D tensor output[batch_size][output_channels][output_size.height][output_size.width] where + * output_size.height = (input_padding.top + input_size.height + input_padding.bottom) - + * (kernel_size.height - 1) + * output_size.width = (input_padding.left + input_size.width + input_padding.right) - + * (kernel_size.width - 1) + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + * @param[out] profile An optional pointer to profiling structure. + * If provided, the structure would record time spent in different phases of the computation. + */ + +enum nnp_status nnp_convolution_output( + enum nnp_convolution_algorithm algorithm, + size_t batch_size, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + const float* input, + const float* kernel, + const float* bias, + float* output, + void* workspace_buffer, + size_t* workspace_size, + enum nnp_activation activation, + const void* activation_parameters, + pthreadpool_t threadpool, + struct nnp_profile* profile); + +/** + * @brief Computes gradient of input of a 2D convolutional layer from gradient of output and kernel tensors. + * @details This function targets training of convolutional neural networks and performs backward propagation. + * It is optimized for moderate minibatch sizes (64-128) and can be inefficient on a small minibatch. + * @param algorithm The type of algorithm to use for convolution. Possible values are: + * + * - nnp_convolution_algorithm_auto -- let the function choose the algorithm. + * - nnp_convolution_algorithm_ft8x8 -- tiled convolution based on 2D Fourier transform with 8x8 blocks. + * Supports kernels up to 8x8. + * - nnp_convolution_algorithm_ft16x16 -- tiled convolution based on 2D Fourier transform with 16x16 blocks. + * Supports kernels up to 16x16. + * - nnp_convolution_algorithm_wt8x8 -- tiled convolution based on 2D Winograd transform F(3x3, 6x6). + * Supports only 3x3 kernels. + * + * @param batch_size The number of images (and their gradients) on the input and output of the convolutional layer. + * @param input_channels The number of channels (AKA features, dimensions) in the input images (and gradients). + * @param output_channels The number of channels (AKA features, dimensions) in the output images (and gradients). + * @param input_size Size of input images and their gradients, excluding implicit zero-padding. + * @param input_padding Implicit zero-padding of input images. + * @param kernel_size Kernel size. + * @param[in] grad_output A 4D tensor grad_output[batch_size][output_channels][output_size.height][output_size.width] + * where + * output_size.height = (input_padding.top + input_size.height + input_padding.bottom) - + * (kernel_size.height - 1) + * output_size.width = (input_padding.left + input_size.width + input_padding.right) - + * (kernel_size.width - 1) + * @param[in] kernel A 4D tensor kernel[output_channels][input_channels][kernel_size.height][kernel_size.width]. + * @param[out] grad_input A 4D tensor grad_input[batch_size][input_channels][input_size.height][input_size.width]. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + * @param[out] profile An optional pointer to profiling structure. + * If provided, the structure would record time spent in different phases of the computation. + */ +enum nnp_status nnp_convolution_input_gradient( + enum nnp_convolution_algorithm algorithm, + size_t batch_size, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + const float* grad_output, + const float* kernel, + float* grad_input, + void* workspace_buffer, + size_t* workspace_size, + enum nnp_activation activation, + const void* activation_parameters, + pthreadpool_t threadpool, + struct nnp_profile* profile); + +/** + * @brief Computes gradient of kernel of a 2D convolutional layer from gradient of output and input tensors. + * @details This function targets training of convolutional neural networks and performs backward propagation. + * It is optimized for moderate minibatch sizes (64-128) and can be inefficient on a small minibatch. + * @param algorithm The type of algorithm to use for convolution. Possible values are: + * + * - nnp_convolution_algorithm_auto -- let the function choose the algorithm. + * - nnp_convolution_algorithm_ft8x8 -- tiled convolution based on 2D Fourier transform with 8x8 blocks. + * Supports kernels up to 8x8. + * - nnp_convolution_algorithm_ft16x16 -- tiled convolution based on 2D Fourier transform with 16x16 blocks. + * Supports kernels up to 16x16. + * + * @param batch_size The number of images (and their gradients) on the input and output of the convolutional layer. + * @param input_channels The number of channels (AKA features, dimensions) in the input images. + * @param output_channels The number of channels (AKA features, dimensions) in the output images (and gradients). + * @param input_size Size of input images and their gradients, excluding implicit zero-padding. + * @param input_padding Implicit zero-padding of input images. + * @param kernel_size Kernel size. + * @param[in] input A 4D tensor input[batch_size][input_channels][input_size.height][input_size.width]. + * @param[in] grad_output A 4D tensor grad_output[batch_size][output_channels][output_size.height][output_size.width] + * where + * output_size.height = (input_padding.top + input_size.height + input_padding.bottom) - + * (kernel_size.height - 1) + * output_size.width = (input_padding.left + input_size.width + input_padding.right) - + * (kernel_size.width - 1) + * @param[out] grad_kernel A 4D tensor + * grad_kernel[output_channels][input_channels][kernel_size.height][kernel_size.width]. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + * @param[out] profile An optional pointer to profiling structure. + * If provided, the structure would record time spent in different phases of the computation. + */ +enum nnp_status nnp_convolution_kernel_gradient( + enum nnp_convolution_algorithm algorithm, + size_t batch_size, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + const float* input, + const float* grad_output, + float* grad_kernel, + void* workspace_buffer, + size_t* workspace_size, + enum nnp_activation activation, + const void* activation_parameters, + pthreadpool_t threadpool, + struct nnp_profile* profile); + +/** + * @brief Computes output of a 2D convolutional layer for a single input image and a kernel tensor. + * @details This function targets prediction with convolutional neural networks and performs forward propagation. + * @param algorithm The type of algorithm to use for convolution. Possible values are: + * + * - nnp_convolution_algorithm_auto -- let the function choose the algorithm. + * - nnp_convolution_algorithm_ft8x8 -- tiled convolution based on 2D Fourier transform with 8x8 blocks. + * Supports kernels up to 8x8. + * - nnp_convolution_algorithm_ft16x16 -- tiled convolution based on 2D Fourier transform with 16x16 blocks. + * Supports kernels up to 16x16. + * - nnp_convolution_algorithm_wt8x8 -- tiled convolution based on 2D Winograd transform F(3x3, 6x6). + * Supports only 3x3 kernels. + * + * @param transform_strategy A strategy that guides computation of kernel transforms coefficients. + * Possible values are: + * + * - nnp_convolution_transform_strategy_block_based -- do multiplication-accumulations on blocks of transformed + * coefficients. + * - nnp_convolution_transform_strategy_tuple_based -- do multiplication-accumulations on tuples of transformed + * coefficients. + * + * @param input_channels The number of channels (AKA features, dimensions) in the input image. + * @param output_channels The number of channels (AKA features, dimensions) in the output image. + * @param input_size Size of input image, excluding implicit zero-padding. + * @param input_padding Implicit zero-padding of input image. + * @param kernel_size Kernel size. + * @param output_subsampling Subsample region for output, also known as convolution stride. + * @param[in] input A 3D tensor input[input_channels][input_size.height][input_size.width]. + * @param[in] kernel A 4D tensor kernel[output_channels][input_channels][kernel_size.height][kernel_size.width]. + * @param[in] bias A 1D array bias[output_channels]. + * @param[out] output A 3D tensor output[output_channels][output_size.height][output_size.width] where + * output_size.height = (input_padding.top + input_size.height + input_padding.bottom) - + * (kernel_size.height - 1) + * output_size.width = (input_padding.left + input_size.width + input_padding.right) - + * (kernel_size.width - 1) + * @param[in] workspace_buffer Buffer for scratch memory used during computation. Buffer must be aligned on 64 bytes. + * If workspace_buffer is NULL and workspace_size is non-NULL, NNPACK would store the size + * of required workspace memory at the workspace_size location, and exit without + * computations. + * If workspace_buffer is NULL and workspace_size is NULL, NNPACK would allocate memory + * before and deallocate after this computation, potentially at significant runtime cost. + * @param[in,out] workspace_size Pointer to the size of workspace buffer. + * If workspace_buffer is NULL, NNPACK will write the size of required scratch memory to + * the location specified by this pointer. + * If workspace_buffer is non-NULL, NNPACK expects workspace_size to specify the size of + * the buffer, in bytes. + * If workspace_size is NULL, workspace_buffer must be NULL as well. In this case NNPACK + * would allocate memory before and deallocate after this computation, potentially at + * significant runtime cost. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + * @param[out] profile An optional pointer to profiling structure. + * If provided, the structure would record time spent in different phases of the computation. + */ +enum nnp_status nnp_convolution_inference( + enum nnp_convolution_algorithm algorithm, + enum nnp_convolution_transform_strategy transform_strategy, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + struct nnp_size output_subsampling, + const float* input, + const float* kernel, + const float* bias, + float* output, + void* workspace_buffer, + size_t* workspace_size, + enum nnp_activation activation, + const void* activation_parameters, + pthreadpool_t threadpool, + struct nnp_profile* profile); + +/** + * @brief Computes output of a fully connected layer from input and kernel matrices. + * @details This function targets training of convolutional neural networks and performs forward propagation. + * It is optimized for moderate minibatch sizes (64-128) and can be inefficient on a small minibatch. + * For minibatch size 1, use nnp_fully_connected_inference for optimal performance. + * @param batch_size The number of vectors on the input and output of the fully connected layer. + * @param input_channels The number of channels (AKA features, dimensions) in the input matrix. + * @param output_channels The number of channels (AKA features, dimensions) in the output matrix. + * @param[in] input A 2D matrix input[batch_size][input_channels]. + * @param[in] kernel A 2D matrix kernel[output_channels][input_channels]. + * @param[out] output A 2D matrix output[batch_size][output_channels]. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_fully_connected_output( + size_t batch_size, + size_t input_channels, + size_t output_channels, + const float input[], + const float kernel[], + float output[], + pthreadpool_t threadpool, + struct nnp_profile* profile); + +/** + * @brief Computes output of a fully connected layer for a single input vector and a kernel matrix. + * @details This function targets prediction with convolutional neural networks and performs forward propagation. + * @param input_channels The number of channels (AKA features, dimensions) in the input vector. + * @param output_channels The number of channels (AKA features, dimensions) in the output vector. + * @param[in] input A 1D array input[input_channels] of FP32 elements. + * @param[in] kernel A 2D matrix kernel[output_channels][input_channels] of FP32 elements. + * @param[out] output A 1D array output[output_channels] of FP32 elements. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_fully_connected_inference( + size_t input_channels, + size_t output_channels, + const float* input, + const float* kernel, + float* output, + pthreadpool_t threadpool); + +/** + * @brief Computes output of a fully connected layer for a single input vector and a kernel matrix. + * @details This function targets prediction with convolutional neural networks and performs forward propagation. + * @param input_channels The number of channels (AKA features, dimensions) in the input vector. + * @param output_channels The number of channels (AKA features, dimensions) in the output vector. + * @param[in] input A 1D array input[input_channels] of FP32 elements. + * @param[in] kernel A 2D matrix kernel[output_channels][input_channels] of FP16 (ARM alternative format) elements. + * @param[out] output A 1D array output[output_channels] of FP32 elements. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_fully_connected_inference_f16f32( + size_t input_channels, + size_t output_channels, + const float* input, + const void* kernel, + float* output, + pthreadpool_t threadpool); + +/** + * @brief Computes output of a max-pooling layer for an input tensor. + * @details This function targets both prediction and training of convolutional neural networks and performs forward + * propagation. Is is optimized for both large and small minibatch sizes. + * @param batch_size The number of images on the input and output of the max-pooling layer. + * @param channels The number of channels (AKA features, dimensions) in both input and output images. + * @param input_size Size of input images, excluding implicit zero-padding. + * @param input_padding Implicit padding of input images. The padding pixels are ignored by the pooling filter, but + * affect the output size. + * @param pooling_size Size of the pooling filter. Only 2x2 filter are currently supported. + * @param pooling_stride Stride of the pooling filter. Only 2x2 strides are currently supported. + * @param[in] input A 4D tensor input[batch_size][channels][input_size.height][input_size.width]. + * @param[out] output A 4D tensor output[batch_size][channels][output_size.height][output_size.width] where + * output_size.height = ceil( + * (input_padding.top + input_size.height + input_padding.bottom - pooling_size.height) / + * pooling_stride.height) + 1 + * output_size.width = ceil( + * (input_padding.left + input_size.width + input_padding.right - pooling_size.width) / + * pooling_stride.width) + 1 + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_max_pooling_output( + size_t batch_size, + size_t channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size pooling_size, + struct nnp_size pooling_stride, + const float input[], + float output[], + pthreadpool_t threadpool); + +/** + * @brief Computes output of a softmax layer for an input matrix. + * @details This function targets both prediction and training of convolutional neural networks and performs forward + * propagation. Is is optimized for both large and small minibatch sizes. + * @param batch_size The number of vectors on the input and output of the softmax layer. + * @param channels The number of channels (AKA features, dimensions) in both input and output vectors. + * @param[in] input A 2D matrix input[batch_size][channels]. + * @param[out] output A 2D matrix output[batch_size][channels]. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_softmax_output( + size_t batch_size, + size_t channels, + const float input[], + float output[], + pthreadpool_t threadpool); + +/** + * @brief Computes output of a rectified linear unit (ReLU) layer for an input matrix. + * @details This function targets both prediction and training of convolutional neural networks and performs forward + * propagation. Is is optimized for both large and small minibatch sizes. + * @param batch_size The number of vectors on the input and output of the ReLU layer. + * @param channels The number of channels (AKA features, dimensions) in both input and output matrices. + * @param[in] input A 2D matrix input[batch_size][channels]. + * @param[out] output A 2D matrix output[batch_size][channels]. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_relu_output( + size_t batch_size, + size_t channels, + const float input[], + float output[], + float negative_slope, + pthreadpool_t threadpool); + +/** + * @brief Computes gradient of input of a rectified linear unit (ReLU) layer from gradient of output and input matrices. + * @details This function targets training of convolutional neural networks and performs backward propagation. + * Is is optimized for both large and small minibatch sizes. + * @param batch_size The number of vectors on the input and output of the ReLU layer. + * @param channels The number of channels (AKA features, dimensions) in both input and output matrices. + * @param[in] input A 2D matrix input[batch_size][channels]. + * @param[out] output A 2D matrix output[batch_size][channels]. + * @param threadpool A thread pool for parallelization of the computation. + * If threadpool is NULL, the computation would run on the caller thread without parallelization. + */ +enum nnp_status nnp_relu_input_gradient( + size_t batch_size, + size_t channels, + const float grad_output[], + const float input[], + float grad_input[], + float negative_slope, + pthreadpool_t threadpool); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#ifdef __cplusplus +// Backward compatible implementations for nnp_convolution_*, if we are in C++ +// mode. +inline enum nnp_status nnp_convolution_output( + enum nnp_convolution_algorithm algorithm, + size_t batch_size, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + const float input[], + const float kernel[], + const float bias[], + float output[], + pthreadpool_t threadpool, + struct nnp_profile* profile) +{ + return nnp_convolution_output( + algorithm, + batch_size, input_channels, output_channels, + input_size, input_padding, kernel_size, + input, kernel, bias, output, + NULL, NULL, + nnp_activation_identity, NULL, threadpool, profile); +} + +inline enum nnp_status nnp_convolution_input_gradient( + enum nnp_convolution_algorithm algorithm, + size_t batch_size, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + const float grad_output[], + const float kernel[], + float grad_input[], + pthreadpool_t threadpool, + struct nnp_profile* profile) +{ + return nnp_convolution_input_gradient( + algorithm, + batch_size, input_channels, output_channels, + input_size, input_padding, kernel_size, + grad_output, kernel, grad_input, + NULL, NULL, + nnp_activation_identity, NULL, threadpool, profile); +} + +inline enum nnp_status nnp_convolution_kernel_gradient( + enum nnp_convolution_algorithm algorithm, + size_t batch_size, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + const float input[], + const float grad_output[], + float grad_kernel[], + pthreadpool_t threadpool, + struct nnp_profile* profile) +{ + return nnp_convolution_kernel_gradient( + algorithm, + batch_size, input_channels, output_channels, + input_size, input_padding, kernel_size, + input, grad_output, grad_kernel, + NULL, NULL, + nnp_activation_identity, NULL, threadpool, profile); +} + +inline enum nnp_status nnp_convolution_inference( + enum nnp_convolution_algorithm algorithm, + enum nnp_convolution_transform_strategy transform_strategy, + size_t input_channels, + size_t output_channels, + struct nnp_size input_size, + struct nnp_padding input_padding, + struct nnp_size kernel_size, + struct nnp_size output_subsampling, + const float input[], + const float kernel[], + const float bias[], + float output[], + pthreadpool_t threadpool, + struct nnp_profile* profile) { + return nnp_convolution_inference( + algorithm, transform_strategy, + input_channels, output_channels, + input_size, input_padding, kernel_size, output_subsampling, + input, kernel, bias, output, NULL, NULL, + nnp_activation_identity, NULL, + threadpool, profile); +} + +#endif // __cplusplus diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/psimd.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/psimd.h new file mode 100644 index 0000000000000000000000000000000000000000..b7cb65d799c98931a73b3184511b1bd8c2b30ec0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/psimd.h @@ -0,0 +1,1384 @@ +#pragma once +#ifndef PSIMD_H +#define PSIMD_H + +#if defined(__CUDA_ARCH__) + /* CUDA compiler */ + #define PSIMD_INTRINSIC __forceinline__ __device__ +#elif defined(__OPENCL_VERSION__) + /* OpenCL compiler */ + #define PSIMD_INTRINSIC inline static +#elif defined(__INTEL_COMPILER) + /* Intel compiler, even on Windows */ + #define PSIMD_INTRINSIC inline static __attribute__((__always_inline__)) +#elif defined(__GNUC__) + /* GCC-compatible compiler (gcc/clang/icc) */ + #define PSIMD_INTRINSIC inline static __attribute__((__always_inline__)) +#elif defined(_MSC_VER) + /* MSVC-compatible compiler (cl/icl/clang-cl) */ + #define PSIMD_INTRINSIC __forceinline static +#elif defined(__cplusplus) + /* Generic C++ compiler */ + #define PSIMD_INTRINSIC inline static +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + /* Generic C99 compiler */ + #define PSIMD_INTRINSIC inline static +#else + /* Generic C compiler */ + #define PSIMD_INTRINSIC static +#endif + +#if defined(__GNUC__) || defined(__clang__) + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + #include + #endif + + #if defined(__SSE2__) + #include + #endif + + #if defined(__SSE3__) + #include + #endif + + #if defined(__SSSE3__) + #include + #endif + + #if defined(__SSE4_1__) + #include + #endif + + #if defined(__SSE4_2__) + #include + #endif + + #if defined(__AVX__) + #include + #endif +#elif defined(_MSC_VER) + #include +#endif + +#if defined(__cplusplus) + #define PSIMD_CXX_SYNTAX +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define PSIMD_C11_SYNTAX +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #define PSIMD_C99_SYNTAX +#else + #define PSIMD_C89_SYNTAX +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) + #include + #include +#elif !defined(__OPENCL_VERSION__) + #include + #include +#endif + +#if defined(__GNUC__) || defined(__clang__) + #define PSIMD_HAVE_F64 0 + #define PSIMD_HAVE_F32 1 + #define PSIMD_HAVE_U8 1 + #define PSIMD_HAVE_S8 1 + #define PSIMD_HAVE_U16 1 + #define PSIMD_HAVE_S16 1 + #define PSIMD_HAVE_U32 1 + #define PSIMD_HAVE_S32 1 + #define PSIMD_HAVE_U64 0 + #define PSIMD_HAVE_S64 0 + + typedef int8_t psimd_s8 __attribute__((vector_size(16), aligned(1))); + typedef uint8_t psimd_u8 __attribute__((vector_size(16), aligned(1))); + typedef int16_t psimd_s16 __attribute__((vector_size(16), aligned(2))); + typedef uint16_t psimd_u16 __attribute__((vector_size(16), aligned(2))); + typedef int32_t psimd_s32 __attribute__((vector_size(16), aligned(4))); + typedef uint32_t psimd_u32 __attribute__((vector_size(16), aligned(4))); + typedef float psimd_f32 __attribute__((vector_size(16), aligned(4))); + + typedef struct { + psimd_s8 lo; + psimd_s8 hi; + } psimd_s8x2; + + typedef struct { + psimd_u8 lo; + psimd_u8 hi; + } psimd_u8x2; + + typedef struct { + psimd_s16 lo; + psimd_s16 hi; + } psimd_s16x2; + + typedef struct { + psimd_u16 lo; + psimd_u16 hi; + } psimd_u16x2; + + typedef struct { + psimd_s32 lo; + psimd_s32 hi; + } psimd_s32x2; + + typedef struct { + psimd_u32 lo; + psimd_u32 hi; + } psimd_u32x2; + + typedef struct { + psimd_f32 lo; + psimd_f32 hi; + } psimd_f32x2; + + /* Bit casts */ + PSIMD_INTRINSIC psimd_u32x2 psimd_cast_s32x2_u32x2(psimd_s32x2 v) { + return (psimd_u32x2) { .lo = (psimd_u32) v.lo, .hi = (psimd_u32) v.hi }; + } + + PSIMD_INTRINSIC psimd_f32x2 psimd_cast_s32x2_f32x2(psimd_s32x2 v) { + return (psimd_f32x2) { .lo = (psimd_f32) v.lo, .hi = (psimd_f32) v.hi }; + } + + PSIMD_INTRINSIC psimd_s32x2 psimd_cast_u32x2_s32x2(psimd_u32x2 v) { + return (psimd_s32x2) { .lo = (psimd_s32) v.lo, .hi = (psimd_s32) v.hi }; + } + + PSIMD_INTRINSIC psimd_f32x2 psimd_cast_u32x2_f32x2(psimd_u32x2 v) { + return (psimd_f32x2) { .lo = (psimd_f32) v.lo, .hi = (psimd_f32) v.hi }; + } + + PSIMD_INTRINSIC psimd_s32x2 psimd_cast_f32x2_s32x2(psimd_f32x2 v) { + return (psimd_s32x2) { .lo = (psimd_s32) v.lo, .hi = (psimd_s32) v.hi }; + } + + PSIMD_INTRINSIC psimd_u32x2 psimd_cast_f32x2_u32x2(psimd_f32x2 v) { + return (psimd_u32x2) { .lo = (psimd_u32) v.lo, .hi = (psimd_u32) v.hi }; + } + + /* Swap */ + PSIMD_INTRINSIC void psimd_swap_s8(psimd_s8 a[1], psimd_s8 b[1]) { + const psimd_s8 new_a = *b; + const psimd_s8 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_u8(psimd_u8 a[1], psimd_u8 b[1]) { + const psimd_u8 new_a = *b; + const psimd_u8 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_s16(psimd_s16 a[1], psimd_s16 b[1]) { + const psimd_s16 new_a = *b; + const psimd_s16 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_u16(psimd_u16 a[1], psimd_u16 b[1]) { + const psimd_u16 new_a = *b; + const psimd_u16 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_s32(psimd_s32 a[1], psimd_s32 b[1]) { + const psimd_s32 new_a = *b; + const psimd_s32 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_u32(psimd_u32 a[1], psimd_u32 b[1]) { + const psimd_u32 new_a = *b; + const psimd_u32 new_b = *a; + *a = new_a; + *b = new_b; + } + + PSIMD_INTRINSIC void psimd_swap_f32(psimd_f32 a[1], psimd_f32 b[1]) { + const psimd_f32 new_a = *b; + const psimd_f32 new_b = *a; + *a = new_a; + *b = new_b; + } + + /* Zero-initialization */ + PSIMD_INTRINSIC psimd_s8 psimd_zero_s8(void) { + return (psimd_s8) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u8 psimd_zero_u8(void) { + return (psimd_u8) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_s16 psimd_zero_s16(void) { + return (psimd_s16) { 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u16 psimd_zero_u16(void) { + return (psimd_u16) { 0, 0, 0, 0, 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_zero_s32(void) { + return (psimd_s32) { 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_zero_u32(void) { + return (psimd_u32) { 0, 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_zero_f32(void) { + return (psimd_f32) { 0.0f, 0.0f, 0.0f, 0.0f }; + } + + /* Initialization to the same constant */ + PSIMD_INTRINSIC psimd_s8 psimd_splat_s8(int8_t c) { + return (psimd_s8) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_u8 psimd_splat_u8(uint8_t c) { + return (psimd_u8) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_s16 psimd_splat_s16(int16_t c) { + return (psimd_s16) { c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_u16 psimd_splat_u16(uint16_t c) { + return (psimd_u16) { c, c, c, c, c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_splat_s32(int32_t c) { + return (psimd_s32) { c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_splat_u32(uint32_t c) { + return (psimd_u32) { c, c, c, c }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat_f32(float c) { + return (psimd_f32) { c, c, c, c }; + } + + /* Load vector */ + PSIMD_INTRINSIC psimd_s8 psimd_load_s8(const void* address) { + return *((const psimd_s8*) address); + } + + PSIMD_INTRINSIC psimd_u8 psimd_load_u8(const void* address) { + return *((const psimd_u8*) address); + } + + PSIMD_INTRINSIC psimd_s16 psimd_load_s16(const void* address) { + return *((const psimd_s16*) address); + } + + PSIMD_INTRINSIC psimd_u16 psimd_load_u16(const void* address) { + return *((const psimd_u16*) address); + } + + PSIMD_INTRINSIC psimd_s32 psimd_load_s32(const void* address) { + return *((const psimd_s32*) address); + } + + PSIMD_INTRINSIC psimd_u32 psimd_load_u32(const void* address) { + return *((const psimd_u32*) address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_f32(const void* address) { + return *((const psimd_f32*) address); + } + + PSIMD_INTRINSIC psimd_s8 psimd_load_splat_s8(const void* address) { + return psimd_splat_s8(*((const int8_t*) address)); + } + + PSIMD_INTRINSIC psimd_u8 psimd_load_splat_u8(const void* address) { + return psimd_splat_u8(*((const uint8_t*) address)); + } + + PSIMD_INTRINSIC psimd_s16 psimd_load_splat_s16(const void* address) { + return psimd_splat_s16(*((const int16_t*) address)); + } + + PSIMD_INTRINSIC psimd_u16 psimd_load_splat_u16(const void* address) { + return psimd_splat_u16(*((const uint16_t*) address)); + } + + PSIMD_INTRINSIC psimd_s32 psimd_load_splat_s32(const void* address) { + return psimd_splat_s32(*((const int32_t*) address)); + } + + PSIMD_INTRINSIC psimd_u32 psimd_load_splat_u32(const void* address) { + return psimd_splat_u32(*((const uint32_t*) address)); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_splat_f32(const void* address) { + return psimd_splat_f32(*((const float*) address)); + } + + PSIMD_INTRINSIC psimd_s32 psimd_load1_s32(const void* address) { + return (psimd_s32) { *((const int32_t*) address), 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_load1_u32(const void* address) { + return (psimd_u32) { *((const uint32_t*) address), 0, 0, 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load1_f32(const void* address) { + return (psimd_f32) { *((const float*) address), 0.0f, 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_load2_s32(const void* address) { + const int32_t* address_s32 = (const int32_t*) address; + return (psimd_s32) { address_s32[0], address_s32[1], 0, 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_load2_u32(const void* address) { + const uint32_t* address_u32 = (const uint32_t*) address; + return (psimd_u32) { address_u32[0], address_u32[1], 0, 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load2_f32(const void* address) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[1], 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_load3_s32(const void* address) { + const int32_t* address_s32 = (const int32_t*) address; + return (psimd_s32) { address_s32[0], address_s32[1], address_s32[2], 0 }; + } + + PSIMD_INTRINSIC psimd_u32 psimd_load3_u32(const void* address) { + const uint32_t* address_u32 = (const uint32_t*) address; + return (psimd_u32) { address_u32[0], address_u32[1], address_u32[2], 0 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load3_f32(const void* address) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[1], address_f32[2], 0.0f }; + } + + PSIMD_INTRINSIC psimd_s32 psimd_load4_s32(const void* address) { + return psimd_load_s32(address); + } + + PSIMD_INTRINSIC psimd_u32 psimd_load4_u32(const void* address) { + return psimd_load_u32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load4_f32(const void* address) { + return psimd_load_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_stride2_f32(const void* address) { + const psimd_f32 v0x1x = psimd_load_f32(address); + const psimd_f32 vx2x3 = psimd_load_f32((const float*) address + 3); + #if defined(__clang__) + return __builtin_shufflevector(v0x1x, vx2x3, 0, 2, 5, 7); + #else + return __builtin_shuffle(v0x1x, vx2x3, (psimd_s32) { 0, 2, 5, 7 }); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_load1_stride2_f32(const void* address) { + return psimd_load_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load2_stride2_f32(const void* address) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[2], 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load3_stride2_f32(const void* address) { + const psimd_f32 v0x1x = psimd_load_f32(address); + const psimd_f32 v2zzz = psimd_load1_f32((const float*) address + 2); + #if defined(__clang__) + return __builtin_shufflevector(v0x1x, v2zzz, 0, 2, 4, 6); + #else + return __builtin_shuffle(v0x1x, v2zzz, (psimd_s32) { 0, 2, 4, 6 }); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_load4_stride2_f32(const void* address) { + return psimd_load_stride2_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load_stride_f32(const void* address, size_t stride) { + const float* address0_f32 = (const float*) address; + const float* address1_f32 = address0_f32 + stride; + const float* address2_f32 = address1_f32 + stride; + const float* address3_f32 = address2_f32 + stride; + return (psimd_f32) { *address0_f32, *address1_f32, *address2_f32, *address3_f32 }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load1_stride_f32(const void* address, size_t stride) { + return psimd_load1_f32(address); + } + + PSIMD_INTRINSIC psimd_f32 psimd_load2_stride_f32(const void* address, size_t stride) { + const float* address_f32 = (const float*) address; + return (psimd_f32) { address_f32[0], address_f32[stride], 0.0f, 0.0f }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load3_stride_f32(const void* address, size_t stride) { + const float* address0_f32 = (const float*) address; + const float* address1_f32 = address0_f32 + stride; + const float* address2_f32 = address1_f32 + stride; + return (psimd_f32) { *address0_f32, *address1_f32, *address2_f32, 0.0f }; + } + + PSIMD_INTRINSIC psimd_f32 psimd_load4_stride_f32(const void* address, size_t stride) { + return psimd_load_stride_f32(address, stride); + } + + /* Store vector */ + PSIMD_INTRINSIC void psimd_store_s8(void* address, psimd_s8 value) { + *((psimd_s8*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_u8(void* address, psimd_u8 value) { + *((psimd_u8*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_s16(void* address, psimd_s16 value) { + *((psimd_s16*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_u16(void* address, psimd_u16 value) { + *((psimd_u16*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_s32(void* address, psimd_s32 value) { + *((psimd_s32*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_u32(void* address, psimd_u32 value) { + *((psimd_u32*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store_f32(void* address, psimd_f32 value) { + *((psimd_f32*) address) = value; + } + + PSIMD_INTRINSIC void psimd_store1_s32(void* address, psimd_s32 value) { + *((int32_t*) address) = value[0]; + } + + PSIMD_INTRINSIC void psimd_store1_u32(void* address, psimd_u32 value) { + *((uint32_t*) address) = value[0]; + } + + PSIMD_INTRINSIC void psimd_store1_f32(void* address, psimd_f32 value) { + *((float*) address) = value[0]; + } + + PSIMD_INTRINSIC void psimd_store2_s32(void* address, psimd_s32 value) { + int32_t* address_s32 = (int32_t*) address; + address_s32[0] = value[0]; + address_s32[1] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store2_u32(void* address, psimd_u32 value) { + uint32_t* address_u32 = (uint32_t*) address; + address_u32[0] = value[0]; + address_u32[1] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store2_f32(void* address, psimd_f32 value) { + float* address_f32 = (float*) address; + address_f32[0] = value[0]; + address_f32[1] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store3_s32(void* address, psimd_s32 value) { + int32_t* address_s32 = (int32_t*) address; + address_s32[0] = value[0]; + address_s32[1] = value[1]; + address_s32[2] = value[2]; + } + + PSIMD_INTRINSIC void psimd_store3_u32(void* address, psimd_u32 value) { + uint32_t* address_u32 = (uint32_t*) address; + address_u32[0] = value[0]; + address_u32[1] = value[1]; + address_u32[2] = value[2]; + } + + PSIMD_INTRINSIC void psimd_store3_f32(void* address, psimd_f32 value) { + float* address_f32 = (float*) address; + address_f32[0] = value[0]; + address_f32[1] = value[1]; + address_f32[2] = value[2]; + } + + PSIMD_INTRINSIC void psimd_store4_s32(void* address, psimd_s32 value) { + psimd_store_s32(address, value); + } + + PSIMD_INTRINSIC void psimd_store4_u32(void* address, psimd_u32 value) { + psimd_store_u32(address, value); + } + + PSIMD_INTRINSIC void psimd_store4_f32(void* address, psimd_f32 value) { + psimd_store_f32(address, value); + } + + PSIMD_INTRINSIC void psimd_store_stride_f32(void* address, size_t stride, psimd_f32 value) { + float* address0_f32 = (float*) address; + float* address1_f32 = address0_f32 + stride; + float* address2_f32 = address1_f32 + stride; + float* address3_f32 = address2_f32 + stride; + *address0_f32 = value[0]; + *address1_f32 = value[1]; + *address2_f32 = value[2]; + *address3_f32 = value[3]; + } + + PSIMD_INTRINSIC void psimd_store1_stride_f32(void* address, size_t stride, psimd_f32 value) { + psimd_store1_f32(address, value); + } + + PSIMD_INTRINSIC void psimd_store2_stride_f32(void* address, size_t stride, psimd_f32 value) { + float* address_f32 = (float*) address; + address_f32[0] = value[0]; + address_f32[stride] = value[1]; + } + + PSIMD_INTRINSIC void psimd_store3_stride_f32(void* address, size_t stride, psimd_f32 value) { + float* address0_f32 = (float*) address; + float* address1_f32 = address0_f32 + stride; + float* address2_f32 = address1_f32 + stride; + *address0_f32 = value[0]; + *address1_f32 = value[1]; + *address2_f32 = value[2]; + } + + /* Vector addition */ + PSIMD_INTRINSIC psimd_s8 psimd_add_s8(psimd_s8 a, psimd_s8 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_u8 psimd_add_u8(psimd_u8 a, psimd_u8 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_s16 psimd_add_s16(psimd_s16 a, psimd_s16 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_u16 psimd_add_u16(psimd_u16 a, psimd_u16 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_s32 psimd_add_s32(psimd_s32 a, psimd_s32 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_u32 psimd_add_u32(psimd_u32 a, psimd_u32 b) { + return a + b; + } + + PSIMD_INTRINSIC psimd_f32 psimd_add_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) && !defined(__FAST_MATH__) + return (psimd_f32) vaddq_f32((float32x4_t) a, (float32x4_t) b); + #else + return a + b; + #endif + } + + /* Vector subtraction */ + PSIMD_INTRINSIC psimd_s8 psimd_sub_s8(psimd_s8 a, psimd_s8 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_u8 psimd_sub_u8(psimd_u8 a, psimd_u8 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_s16 psimd_sub_s16(psimd_s16 a, psimd_s16 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_u16 psimd_sub_u16(psimd_u16 a, psimd_u16 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_s32 psimd_sub_s32(psimd_s32 a, psimd_s32 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_u32 psimd_sub_u32(psimd_u32 a, psimd_u32 b) { + return a - b; + } + + PSIMD_INTRINSIC psimd_f32 psimd_sub_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) && !defined(__FAST_MATH__) + return (psimd_f32) vsubq_f32((float32x4_t) a, (float32x4_t) b); + #else + return a - b; + #endif + } + + /* Vector multiplication */ + PSIMD_INTRINSIC psimd_s8 psimd_mul_s8(psimd_s8 a, psimd_s8 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_u8 psimd_mul_u8(psimd_u8 a, psimd_u8 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_s16 psimd_mul_s16(psimd_s16 a, psimd_s16 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_u16 psimd_mul_u16(psimd_u16 a, psimd_u16 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_s32 psimd_mul_s32(psimd_s32 a, psimd_s32 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_u32 psimd_mul_u32(psimd_u32 a, psimd_u32 b) { + return a * b; + } + + PSIMD_INTRINSIC psimd_f32 psimd_mul_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) && !defined(__FAST_MATH__) + return (psimd_f32) vmulq_f32((float32x4_t) a, (float32x4_t) b); + #else + return a * b; + #endif + } + + /* Quasi-Fused Multiply-Add */ + PSIMD_INTRINSIC psimd_f32 psimd_qfma_f32(psimd_f32 a, psimd_f32 b, psimd_f32 c) { + #if defined(__aarch64__) || defined(__ARM_NEON__) && defined(__ARM_FEATURE_FMA) + return (psimd_f32) vfmaq_f32((float32x4_t) a, (float32x4_t) b, (float32x4_t) c); + #elif (defined(__x86_64__) || defined(__i386__) || defined(__i686__)) && defined(__FMA__) + return (psimd_f32) _mm_fmadd_ps((__m128) b, (__m128) c, (__m128) a); + #elif (defined(__x86_64__) || defined(__i386__) || defined(__i686__)) && defined(__FMA4__) + return (psimd_f32) _mm_macc_ps((__m128) b, (__m128) c, (__m128) a); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) && PSIMD_ENABLE_WASM_QFMA + return (psimd_f32) __builtin_wasm_qfma_f32x4(a, b, c); + #else + return a + b * c; + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_div_f32(psimd_f32 a, psimd_f32 b) { + return a / b; + } + + /* Vector and */ + PSIMD_INTRINSIC psimd_f32 psimd_andmask_f32(psimd_s32 mask, psimd_f32 v) { + return (psimd_f32) (mask & (psimd_s32) v); + } + + /* Vector and-not */ + PSIMD_INTRINSIC psimd_f32 psimd_andnotmask_f32(psimd_s32 mask, psimd_f32 v) { + return (psimd_f32) (~mask & (psimd_s32) v); + } + + /* Vector blend */ + PSIMD_INTRINSIC psimd_s8 psimd_blend_s8(psimd_s8 mask, psimd_s8 a, psimd_s8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s8) vbslq_s8((uint8x16_t) mask, (int8x16_t) a, (int8x16_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s8) __builtin_wasm_bitselect(a, b, mask); + #else + return (mask & a) | (~mask & b); + #endif + } + + PSIMD_INTRINSIC psimd_u8 psimd_blend_u8(psimd_s8 mask, psimd_u8 a, psimd_u8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u8) vbslq_u8((uint8x16_t) mask, (uint8x16_t) a, (uint8x16_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u8) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_u8) ((mask & (psimd_s8) a) | (~mask & (psimd_s8) b)); + #endif + } + + PSIMD_INTRINSIC psimd_s16 psimd_blend_s16(psimd_s16 mask, psimd_s16 a, psimd_s16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s16) vbslq_s16((uint16x8_t) mask, (int16x8_t) a, (int16x8_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s16) __builtin_wasm_bitselect(a, b, mask); + #else + return (mask & a) | (~mask & b); + #endif + } + + PSIMD_INTRINSIC psimd_u16 psimd_blend_u16(psimd_s16 mask, psimd_u16 a, psimd_u16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u16) vbslq_u16((uint16x8_t) mask, (uint16x8_t) a, (uint16x8_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u16) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_u16) ((mask & (psimd_s16) a) | (~mask & (psimd_s16) b)); + #endif + } + + PSIMD_INTRINSIC psimd_s32 psimd_blend_s32(psimd_s32 mask, psimd_s32 a, psimd_s32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s32) vbslq_s32((uint32x4_t) mask, (int32x4_t) a, (int32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_s32) __builtin_wasm_bitselect(a, b, mask); + #else + return (mask & a) | (~mask & b); + #endif + } + + PSIMD_INTRINSIC psimd_u32 psimd_blend_u32(psimd_s32 mask, psimd_u32 a, psimd_u32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u32) vbslq_u32((uint32x4_t) mask, (uint32x4_t) a, (uint32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_u32) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_u32) ((mask & (psimd_s32) a) | (~mask & (psimd_s32) b)); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_blend_f32(psimd_s32 mask, psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vbslq_f32((uint32x4_t) mask, (float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return (psimd_f32) __builtin_wasm_bitselect(a, b, mask); + #else + return (psimd_f32) ((mask & (psimd_s32) a) | (~mask & (psimd_s32) b)); + #endif + } + + /* Vector blend on sign */ + PSIMD_INTRINSIC psimd_s8 psimd_signblend_s8(psimd_s8 x, psimd_s8 a, psimd_s8 b) { + return psimd_blend_s8(x >> psimd_splat_s8(7), a, b); + } + + PSIMD_INTRINSIC psimd_u8 psimd_signblend_u8(psimd_s8 x, psimd_u8 a, psimd_u8 b) { + return psimd_blend_u8((x >> psimd_splat_s8(7)), a, b); + } + + PSIMD_INTRINSIC psimd_s16 psimd_signblend_s16(psimd_s16 x, psimd_s16 a, psimd_s16 b) { + return psimd_blend_s16(x >> psimd_splat_s16(15), a, b); + } + + PSIMD_INTRINSIC psimd_u16 psimd_signblend_u16(psimd_s16 x, psimd_u16 a, psimd_u16 b) { + return psimd_blend_u16((x >> psimd_splat_s16(15)), a, b); + } + + PSIMD_INTRINSIC psimd_s32 psimd_signblend_s32(psimd_s32 x, psimd_s32 a, psimd_s32 b) { + return psimd_blend_s32(x >> psimd_splat_s32(31), a, b); + } + + PSIMD_INTRINSIC psimd_u32 psimd_signblend_u32(psimd_s32 x, psimd_u32 a, psimd_u32 b) { + return psimd_blend_u32((x >> psimd_splat_s32(31)), a, b); + } + + PSIMD_INTRINSIC psimd_f32 psimd_signblend_f32(psimd_f32 x, psimd_f32 a, psimd_f32 b) { + const psimd_s32 mask = (psimd_s32) x >> psimd_splat_s32(31); + return psimd_blend_f32(mask, a, b); + } + + /* Vector absolute value */ + PSIMD_INTRINSIC psimd_f32 psimd_abs_f32(psimd_f32 v) { + const psimd_s32 mask = (psimd_s32) psimd_splat_f32(-0.0f); + return (psimd_f32) ((psimd_s32) v & ~mask); + } + + /* Vector negation */ + PSIMD_INTRINSIC psimd_f32 psimd_neg_f32(psimd_f32 v) { + const psimd_s32 mask = (psimd_s32) psimd_splat_f32(-0.0f); + return (psimd_f32) ((psimd_s32) v ^ mask); + } + + /* Vector maximum */ + PSIMD_INTRINSIC psimd_s8 psimd_max_s8(psimd_s8 a, psimd_s8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s8) vmaxq_s8((int8x16_t) a, (int8x16_t) b); + #else + return psimd_blend_s8(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u8 psimd_max_u8(psimd_u8 a, psimd_u8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u8) vmaxq_u8((uint8x16_t) a, (uint8x16_t) b); + #else + return psimd_blend_u8(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s16 psimd_max_s16(psimd_s16 a, psimd_s16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s16) vmaxq_s16((int16x8_t) a, (int16x8_t) b); + #else + return psimd_blend_s16(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u16 psimd_max_u16(psimd_u16 a, psimd_u16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u16) vmaxq_u16((uint16x8_t) a, (uint16x8_t) b); + #else + return psimd_blend_u16(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s32 psimd_max_s32(psimd_s32 a, psimd_s32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s32) vmaxq_s32((int32x4_t) a, (int32x4_t) b); + #else + return psimd_blend_s32(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u32 psimd_max_u32(psimd_u32 a, psimd_u32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u32) vmaxq_u32((uint32x4_t) a, (uint32x4_t) b); + #else + return psimd_blend_u32(a > b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_max_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vmaxq_f32((float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return __builtin_wasm_max_f32x4(a, b); + #else + return psimd_blend_f32(a > b, a, b); + #endif + } + + /* Vector minimum */ + PSIMD_INTRINSIC psimd_s8 psimd_min_s8(psimd_s8 a, psimd_s8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s8) vminq_s8((int8x16_t) a, (int8x16_t) b); + #else + return psimd_blend_s8(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u8 psimd_min_u8(psimd_u8 a, psimd_u8 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u8) vminq_u8((uint8x16_t) a, (uint8x16_t) b); + #else + return psimd_blend_u8(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s16 psimd_min_s16(psimd_s16 a, psimd_s16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s16) vminq_s16((int16x8_t) a, (int16x8_t) b); + #else + return psimd_blend_s16(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u16 psimd_min_u16(psimd_u16 a, psimd_u16 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u16) vminq_u16((uint16x8_t) a, (uint16x8_t) b); + #else + return psimd_blend_u16(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_s32 psimd_min_s32(psimd_s32 a, psimd_s32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_s32) vminq_s32((int32x4_t) a, (int32x4_t) b); + #else + return psimd_blend_s32(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_u32 psimd_min_u32(psimd_u32 a, psimd_u32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_u32) vminq_u32((uint32x4_t) a, (uint32x4_t) b); + #else + return psimd_blend_u32(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_min_f32(psimd_f32 a, psimd_f32 b) { + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vminq_f32((float32x4_t) a, (float32x4_t) b); + #elif defined(__wasm__) && defined(__wasm_simd128__) && defined(__clang__) + return __builtin_wasm_min_f32x4(a, b); + #else + return psimd_blend_f32(a < b, a, b); + #endif + } + + PSIMD_INTRINSIC psimd_f32 psimd_cvt_s32_f32(psimd_s32 v) { + #if defined(__clang__) + return __builtin_convertvector(v, psimd_f32); + #elif defined(__ARM_NEON__) || defined(__ARM_NEON) + return (psimd_f32) vcvtq_f32_s32((int32x4_t) v); + #elif defined(__SSE2__) + return (psimd_f32) _mm_cvtepi32_ps((__m128i) v); + #else + return (psimd_f32) { (float) v[0], (float) v[1], (float) v[2], (float) v[3] }; + #endif + } + + /* Broadcast vector element */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_f32 psimd_splat0_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 0, 0, 0, 0); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat1_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 1, 1, 1, 1); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat2_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 2, 2, 2, 2); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat3_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 3, 3, 3, 3); + } + #else + PSIMD_INTRINSIC psimd_f32 psimd_splat0_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 0, 0, 0, 0 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat1_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 1, 1, 1, 1 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat2_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 2, 2, 2, 2 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_splat3_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 3, 3, 3 }); + } + #endif + + /* Reversal of vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s8 psimd_reverse_s8(psimd_s8 v) { + return __builtin_shufflevector(v, v, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_u8 psimd_reverse_u8(psimd_u8 v) { + return __builtin_shufflevector(v, v, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_s16 psimd_reverse_s16(psimd_s16 v) { + return __builtin_shufflevector(v, v, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_u16 psimd_reverse_u16(psimd_u16 v) { + return __builtin_shufflevector(v, v, 7, 6, 5, 4, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_s32 psimd_reverse_s32(psimd_s32 v) { + return __builtin_shufflevector(v, v, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_u32 psimd_reverse_u32(psimd_u32 v) { + return __builtin_shufflevector(v, v, 3, 2, 1, 0); + } + + PSIMD_INTRINSIC psimd_f32 psimd_reverse_f32(psimd_f32 v) { + return __builtin_shufflevector(v, v, 3, 2, 1, 0); + } + #else + PSIMD_INTRINSIC psimd_s8 psimd_reverse_s8(psimd_s8 v) { + return __builtin_shuffle(v, (psimd_s8) { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_u8 psimd_reverse_u8(psimd_u8 v) { + return __builtin_shuffle(v, (psimd_s8) { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_reverse_s16(psimd_s16 v) { + return __builtin_shuffle(v, (psimd_s16) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_reverse_u16(psimd_u16 v) { + return __builtin_shuffle(v, (psimd_s16) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_reverse_s32(psimd_s32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_reverse_u32(psimd_u32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 2, 1, 0 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_reverse_f32(psimd_f32 v) { + return __builtin_shuffle(v, (psimd_s32) { 3, 2, 1, 0 }); + } + #endif + + /* Interleaving of vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s16 psimd_interleave_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); + } + + PSIMD_INTRINSIC psimd_s16 psimd_interleave_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 0, 4+0, 1, 4+1); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 2, 4+2, 3, 4+3); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 0, 4+0, 1, 4+1); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 2, 4+2, 3, 4+3); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 0, 4+0, 1, 4+1); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 2, 4+2, 3, 4+3); + } + #else + PSIMD_INTRINSIC psimd_s16 psimd_interleave_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_interleave_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_interleave_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 4+0, 1, 4+1 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_interleave_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 4+0, 1, 4+1 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_interleave_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 4+0, 1, 4+1 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_interleave_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 4+2, 3, 4+3 }); + } + #endif + + /* Concatenation of low/high vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s16 psimd_concat_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 0, 1, 4+0, 4+1); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 2, 3, 4+2, 4+3); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 0, 1, 4+0, 4+1); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 2, 3, 4+2, 4+3); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 0, 1, 4+0, 4+1); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 2, 3, 4+2, 4+3); + } + #else + PSIMD_INTRINSIC psimd_s16 psimd_concat_lo_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_hi_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_lo_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_hi_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_lo_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 1, 4+0, 4+1 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_hi_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 3, 4+2, 4+3 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_lo_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 1, 4+0, 4+1 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_hi_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 3, 4+2, 4+3 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_lo_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 1, 4+0, 4+1 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_hi_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 2, 3, 4+2, 4+3 }); + } + #endif + + /* Concatenation of even/odd vector elements */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_s8 psimd_concat_even_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shufflevector(a, b, + 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14); + } + + PSIMD_INTRINSIC psimd_s8 psimd_concat_odd_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shufflevector(a, b, + 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_even_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shufflevector(a, b, + 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_odd_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shufflevector(a, b, + 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_even_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_odd_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shufflevector(a, b, 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_even_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_odd_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shufflevector(a, b, 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_even_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 0, 2, 4+0, 4+2); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_odd_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shufflevector(a, b, 1, 3, 4+1, 4+3); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_even_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 0, 2, 4+0, 4+2); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_odd_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shufflevector(a, b, 1, 3, 4+1, 4+3); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_even_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 0, 2, 4+0, 4+2); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_odd_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shufflevector(a, b, 1, 3, 4+1, 4+3); + } + #else + PSIMD_INTRINSIC psimd_s8 psimd_concat_even_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14 }); + } + + PSIMD_INTRINSIC psimd_s8 psimd_concat_odd_s8(psimd_s8 a, psimd_s8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15 }); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_even_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 0, 2, 4, 6, 8, 10, 12, 14, 16+0, 16+2, 16+4, 16+6, 16+8, 16+10, 16+12, 16+14 }); + } + + PSIMD_INTRINSIC psimd_u8 psimd_concat_odd_u8(psimd_u8 a, psimd_u8 b) { + return __builtin_shuffle(a, b, + (psimd_s8) { 1, 3, 5, 7, 9, 11, 13, 15, 16+1, 16+3, 16+5, 16+7, 16+9, 16+11, 16+13, 16+15 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_even_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6 }); + } + + PSIMD_INTRINSIC psimd_s16 psimd_concat_odd_s16(psimd_s16 a, psimd_s16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_even_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 0, 2, 4, 6, 8+0, 8+2, 8+4, 8+6 }); + } + + PSIMD_INTRINSIC psimd_u16 psimd_concat_odd_u16(psimd_u16 a, psimd_u16 b) { + return __builtin_shuffle(a, b, (psimd_s16) { 1, 3, 5, 7, 8+1, 8+3, 8+5, 8+7 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_even_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 2, 4+0, 4+2 }); + } + + PSIMD_INTRINSIC psimd_s32 psimd_concat_odd_s32(psimd_s32 a, psimd_s32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 1, 3, 4+1, 4+3 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_even_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 2, 4+0, 4+2 }); + } + + PSIMD_INTRINSIC psimd_u32 psimd_concat_odd_u32(psimd_u32 a, psimd_u32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 1, 3, 4+1, 4+3 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_even_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 0, 2, 4+0, 4+2 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_concat_odd_f32(psimd_f32 a, psimd_f32 b) { + return __builtin_shuffle(a, b, (psimd_s32) { 1, 3, 4+1, 4+3 }); + } + #endif + + /* Vector reduce */ + #if defined(__clang__) + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_sum_f32(psimd_f32 v) { + const psimd_f32 temp = v + __builtin_shufflevector(v, v, 2, 3, 0, 1); + return temp + __builtin_shufflevector(temp, temp, 1, 0, 3, 2); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_max_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_max_f32(v, __builtin_shufflevector(v, v, 2, 3, 0, 1)); + return psimd_max_f32(temp, __builtin_shufflevector(temp, temp, 1, 0, 3, 2)); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_min_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_min_f32(v, __builtin_shufflevector(v, v, 2, 3, 0, 1)); + return psimd_min_f32(temp, __builtin_shufflevector(temp, temp, 1, 0, 3, 2)); + } + + PSIMD_INTRINSIC float psimd_reduce_sum_f32(psimd_f32 v) { + const psimd_f32 temp = v + __builtin_shufflevector(v, v, 2, 3, -1, -1); + const psimd_f32 result = temp + __builtin_shufflevector(temp, temp, 1, -1, -1, -1); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_max_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_max_f32(v, __builtin_shufflevector(v, v, 2, 3, -1, -1)); + const psimd_f32 result = psimd_max_f32(temp, __builtin_shufflevector(temp, temp, 1, -1, -1, -1)); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_min_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_min_f32(v, __builtin_shufflevector(v, v, 2, 3, -1, -1)); + const psimd_f32 result = psimd_min_f32(temp, __builtin_shufflevector(temp, temp, 1, -1, -1, -1)); + return result[0]; + } + #else + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_sum_f32(psimd_f32 v) { + const psimd_f32 temp = v + __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 }); + return temp + __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 }); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_max_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_max_f32(v, __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 })); + return psimd_max_f32(temp, __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 })); + } + + PSIMD_INTRINSIC psimd_f32 psimd_allreduce_min_f32(psimd_f32 v) { + const psimd_f32 temp = psimd_min_f32(v, __builtin_shuffle(v, (psimd_s32) { 2, 3, 0, 1 })); + return psimd_min_f32(temp, __builtin_shuffle(temp, (psimd_s32) { 1, 0, 3, 2 })); + } + + PSIMD_INTRINSIC float psimd_reduce_sum_f32(psimd_f32 v) { + const psimd_f32 result = psimd_allreduce_sum_f32(v); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_max_f32(psimd_f32 v) { + const psimd_f32 result = psimd_allreduce_max_f32(v); + return result[0]; + } + + PSIMD_INTRINSIC float psimd_reduce_min_f32(psimd_f32 v) { + const psimd_f32 result = psimd_allreduce_min_f32(v); + return result[0]; + } + #endif +#endif + +#endif /* PSIMD_H */ diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/qnnpack.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/qnnpack.h new file mode 100644 index 0000000000000000000000000000000000000000..591fa68eba5a3c8a6b22c12c4fa6efbefd098b84 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/qnnpack.h @@ -0,0 +1,336 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Status code for any QNNPACK function call. + */ +enum qnnp_status { + /** The call succeeded, and all output arguments now contain valid data. */ + qnnp_status_success = 0, + qnnp_status_uninitialized = 1, + qnnp_status_invalid_parameter = 2, + qnnp_status_unsupported_parameter = 3, + qnnp_status_unsupported_hardware = 4, + qnnp_status_out_of_memory = 5, +}; + +enum qnnp_status qnnp_initialize(void); + +enum qnnp_status qnnp_deinitialize(void); + +typedef struct qnnp_operator* qnnp_operator_t; + +enum qnnp_status qnnp_create_convolution2d_nhwc_q8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + uint8_t input_zero_point, + float input_scale, + uint8_t kernel_zero_point, + float kernel_scale, + const uint8_t* kernel, + const int32_t* bias, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* convolution); + +enum qnnp_status qnnp_setup_convolution2d_nhwc_q8( + qnnp_operator_t convolution, + size_t batch_size, + size_t input_height, + size_t input_width, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride, + pthreadpool_t threadpool); + +enum qnnp_status qnnp_create_deconvolution2d_nhwc_q8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t adjustment_height, + uint32_t adjustment_width, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + uint8_t input_zero_point, + float input_scale, + uint8_t kernel_zero_point, + float kernel_scale, + const uint8_t* kernel, + const int32_t* bias, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* deconvolution); + +enum qnnp_status qnnp_setup_deconvolution2d_nhwc_q8( + qnnp_operator_t deconvolution, + size_t batch_size, + size_t input_height, + size_t input_width, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride, + pthreadpool_t threadpool); + +enum qnnp_status qnnp_create_fully_connected_nc_q8( + size_t input_channels, + size_t output_channels, + uint8_t input_zero_point, + float input_scale, + uint8_t kernel_zero_point, + float kernel_scale, + const uint8_t* kernel, + const int32_t* bias, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* fully_connected); + +enum qnnp_status qnnp_setup_fully_connected_nc_q8( + qnnp_operator_t fully_connected, + size_t batch_size, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_create_global_average_pooling_nwc_q8( + size_t channels, + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* global_average_pooling); + +enum qnnp_status qnnp_setup_global_average_pooling_nwc_q8( + qnnp_operator_t global_average_pooling, + size_t batch_size, + size_t width, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_create_average_pooling2d_nhwc_q8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + size_t channels, + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* average_pooling); + +enum qnnp_status qnnp_setup_average_pooling2d_nhwc_q8( + qnnp_operator_t average_pooling, + size_t batch_size, + size_t input_height, + size_t input_width, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride, + pthreadpool_t threadpool); + +enum qnnp_status qnnp_create_max_pooling2d_nhwc_u8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + size_t channels, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* max_pooling); + +enum qnnp_status qnnp_setup_max_pooling2d_nhwc_u8( + qnnp_operator_t max_pooling, + size_t batch_size, + size_t input_height, + size_t input_width, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride, + pthreadpool_t threadpool); + +enum qnnp_status qnnp_create_channel_shuffle_nc_x8( + size_t groups, + size_t group_channels, + uint32_t flags, + qnnp_operator_t* channel_shuffle); + +enum qnnp_status qnnp_setup_channel_shuffle_nc_x8( + qnnp_operator_t channel_shuffle, + size_t batch_size, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_create_add_nc_q8( + size_t channels, + uint8_t a_zero_point, + float a_scale, + uint8_t b_zero_point, + float b_scale, + uint8_t sum_zero_point, + float sum_scale, + uint8_t sum_min, + uint8_t sum_max, + uint32_t flags, + qnnp_operator_t* add); + +enum qnnp_status qnnp_setup_add_nc_q8( + qnnp_operator_t add, + size_t batch_size, + const uint8_t* a, + size_t a_stride, + const uint8_t* b, + size_t b_stride, + uint8_t* sum, + size_t sum_stride); + +enum qnnp_status qnnp_create_clamp_nc_u8( + size_t channels, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* clamp); + +enum qnnp_status qnnp_setup_clamp_nc_u8( + qnnp_operator_t clamp, + size_t batch_size, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_create_sigmoid_nc_q8( + size_t channels, + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* sigmoid); + +enum qnnp_status qnnp_setup_sigmoid_nc_q8( + qnnp_operator_t sigmoid, + size_t batch_size, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_create_leaky_relu_nc_q8( + size_t channels, + float negative_slope, + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + qnnp_operator_t* leaky_relu); + +enum qnnp_status qnnp_setup_leaky_relu_nc_q8( + qnnp_operator_t leaky_relu, + size_t batch_size, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_create_softargmax_nc_q8( + size_t channels, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint32_t flags, + qnnp_operator_t* softargmax); + +enum qnnp_status qnnp_setup_softargmax_nc_q8( + qnnp_operator_t softargmax, + size_t batch_size, + const uint8_t* input, + size_t input_stride, + uint8_t* output, + size_t output_stride); + +enum qnnp_status qnnp_run_operator( + qnnp_operator_t op, + pthreadpool_t threadpool); + +enum qnnp_status qnnp_delete_operator( + qnnp_operator_t op); + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/xnnpack.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/xnnpack.h new file mode 100644 index 0000000000000000000000000000000000000000..e71be0fd57ffc1ef2cc67b2fc8fb20fc4288a1d2 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/xnnpack.h @@ -0,0 +1,6172 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// All rights reserved. +// +// Copyright 2019 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// The number of bytes XNNPACK may read beyond array bounds. +/// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK. +/// +/// Note: XNNPACK reads, but never writes beyond array bounds. +#define XNN_EXTRA_BYTES 16 + +/// Maximum number of dimensions in tensor shape. +#define XNN_MAX_TENSOR_DIMS 6 + +/// Allow sparse inference in a Runtime. +/// +/// Note: this flag hints XNNPACK to consider sparse inference, but does not guarantee it. +#define XNN_FLAG_HINT_SPARSE_INFERENCE 0x00000001 + +/// Allow IEEE FP16 inference in a Runtime. +/// +/// Note: this flag hints XNNPACK to consider IEEE FP16 inference, but does not guarantee it. +#define XNN_FLAG_HINT_FP16_INFERENCE 0x00000002 + +/// Force IEEE FP16 inference in a Runtime, and fail if FP16 inference is not possible. +/// +/// Note: this flag guarantees that XNNPACK will use IEEE FP16 inference, or fail to create the Runtime object. +/// Warning: on x86 systems FP16 computations will be emulated at a substantial performance cost. +#define XNN_FLAG_FORCE_FP16_INFERENCE 0x00000004 + +/// Enable timing of each operator's runtime. +#define XNN_FLAG_BASIC_PROFILING 0x00000008 + +/// Enable the just-in-time compiler. +#define XNN_FLAG_JIT 0x00000010 + +/// The convolution operator represents a depthwise convolution, and use HWGo layout for filters. +#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 + +/// Assume transposed weights in a fully connected operator. +#define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001 + +/// The operator assumes NHWC layout for the input, regardless of the output layout. +#define XNN_FLAG_INPUT_NHWC 0x00000002 + +/// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size. +#define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004 + +/// Assume transposed weights in a batch matrix multiply operator. +#define XNN_FLAG_TRANSPOSE_B XNN_FLAG_TRANSPOSE_WEIGHTS + +/// Assume transposed input in a batch matrix multiply operator. +#define XNN_FLAG_TRANSPOSE_A 0x00000002 + +/// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor. +#define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004 + +/// Match behaviour of TensorFlow 1.x. +#define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004 + +/// Static weights of the FP16 operator are in FP32 format. +#define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008 + +/// Align corners of input and output images in resize operations. +#define XNN_FLAG_ALIGN_CORNERS 0x00000008 + +/// Yield worker threads of the thread pool to the system scheduler after the inference. +#define XNN_FLAG_YIELD_WORKERS 0x00000010 + +/// Use transient indirection buffer to reduce memory footprint +#define XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER 0x00000020 + +/// Reduce the dimensions. +#define XNN_FLAG_REDUCE_DIMS 0x00000040 + +/// The number of entries in an array of xnn_dynamic_quantization_params that XNNPACK may read beyond array bounds. +/// The caller must allocate at least this many extra xnn_dynamic_quantization_params before passing the array to XNNPACK. +/// +/// Note: XNNPACK reads, but never writes beyond array bounds. +#define XNN_EXTRA_QUANTIZATION_PARAMS 8 + +struct xnn_dynamic_quantization_params { + int32_t zero_point; + float scale; +}; + +/// Status code for any XNNPACK function call. +enum xnn_status { + /// The call succeeded, and all output arguments now contain valid data. + xnn_status_success = 0, + xnn_status_uninitialized = 1, + xnn_status_invalid_parameter = 2, + xnn_status_invalid_state = 3, + xnn_status_unsupported_parameter = 4, + xnn_status_unsupported_hardware = 5, + xnn_status_out_of_memory = 6, + xnn_status_reallocation_required = 7, +}; + +struct xnn_allocator { + /// User-specified pointer that will be passed as-is to all functions in this structure. + void* context; + /// Pointer to a function to be called for general memory allocation. + /// + /// @param context - The user-specified pointer from xnn_allocator structure. + /// @param size - The size of the memory block to allocate, in bytes. + /// + /// @returns Pointer to the allocated memory block of at least @ref size bytes. + /// If allocation fails, the function must return NULL. + void* (*allocate)(void* context, size_t size); + /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously + /// allocated memory block. The content of the old memory block is copied to the new memory block. + /// + /// @param context - The user-specified pointer from xnn_allocator structure. + /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. + /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call. + /// @param size - The new size of the memory block to allocate, in bytes. + /// + /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous + /// memory block. + /// If allocation fails, the function must return NULL, but must not release the previous memory block. + void* (*reallocate)(void* context, void* pointer, size_t size); + /// Pointer to a function to be called for general memory de-allocation. + /// + /// @param context - The user-specified pointer from xnn_allocator structure. + /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. + /// If the pointer is NULL, the @ref deallocate call is a no-op. + void (*deallocate)(void* context, void* pointer); + /// Pointer to a function to be called for aligned memory allocation. + /// + /// @param context - The user-specified pointer from xnn_allocator structure. + /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2. + /// @param size - The size of the memory block to allocate, in bytes. + /// + /// @returns Pointer to the allocated memory block of at least @ref size bytes. + /// If allocation fails, the function must return NULL. + void* (*aligned_allocate)(void* context, size_t alignment, size_t size); + /// Pointer to a function to be called for aligned memory de-allocation. + /// + /// @param context - The user-specified pointer from xnn_allocator structure. + /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL. + /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op. + void (*aligned_deallocate)(void* context, void* pointer); +}; + +/// Initialize XNNPACK library. +/// +/// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures +/// depending on the host processor. Initialization can be time-consuming. +/// +/// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation. +/// If this argument is NULL, system-provided memory management functions (e.g. malloc/free) +/// will be used. +/// +/// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use. +/// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition. +/// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the +/// minimum hardware requirements for XNNPACK. E.g. this may happen on x86 +/// processors without SSE2 extension, or on 32-bit ARM processors without +/// the NEON SIMD extension. +enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); + +/// Deinitialize XNNPACK library. +/// +/// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call. +/// +/// @retval xnn_status_success - deinitialization call succeeded. +enum xnn_status xnn_deinitialize(void); + +/// Subgraph is an abstract representation of a neural network model. +/// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model. +typedef struct xnn_subgraph* xnn_subgraph_t; + +/// Create a empty Subgraph object. +/// +/// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation. +/// The Subgraph object would avoid creating internal Value IDs in the +/// [0, reserved_value_ids-1] range. +/// @param flags - binary features of the subgraph. No supported flags are currently defined. +/// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon +/// successful return. +enum xnn_status xnn_create_subgraph( + uint32_t external_value_ids, + uint32_t flags, + xnn_subgraph_t* subgraph_out); + +/// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph. +/// +/// @param subgraph - the Subgraph object to destroy. +enum xnn_status xnn_delete_subgraph( + xnn_subgraph_t subgraph); + +#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 +#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 +#define XNN_VALUE_FLAG_PERSISTENT 0x00000004 + +#define XNN_INVALID_VALUE_ID UINT32_MAX + +/// Type of elements in a Value object. +enum xnn_datatype { + /// Invalid data type. Valid Values never have this datatype. + xnn_datatype_invalid = 0, + /// IEEE754 single-precision floating-point. + xnn_datatype_fp32 = 1, + /// IEEE754 half-precision floating-point. + xnn_datatype_fp16 = 2, + /// Quantized 8-bit signed integer with shared per-Value quantization parameters. + xnn_datatype_qint8 = 3, + /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. + xnn_datatype_quint8 = 4, + /// Quantized 32-bit signed integer with shared per-Value quantization parameters. + xnn_datatype_qint32 = 5, + /// Quantized 8-bit signed integer with shared per-channel quantization parameters. + xnn_datatype_qcint8 = 6, + /// Quantized 32-bit signed integer with shared per-channel quantization parameters. + xnn_datatype_qcint32 = 7, + /// Quantized 4-bit signed integer with shared per-channel quantization parameters. + xnn_datatype_qcint4 = 8, + /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters. + xnn_datatype_qdint8 = 9, +}; + +/// Define a tensor-type Value and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Value. +/// @param datatype - type of the tensor elements. +/// @param num_dims - number of dimensions in the shape. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, +/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time +/// of the Subgraph object, and of any Runtime objects created from the Subgraph. +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on +/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be +/// created for the Value. +/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT +/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. +/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a +/// valid @a external_id was provided, the variable will be initialized with the @a external_id value. +enum xnn_status xnn_define_tensor_value( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + size_t num_dims, + const size_t* dims, + const void* data, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out); + +/// Define a quantized tensor-type Value and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Value. +/// @param datatype - type of the tensor elements. +/// @param zero_point - offset from zero to subtract from the quantized elements in the Value. +/// @param scale - multiplication factor to convert quantized elements to real representation. +/// @param num_dims - number of dimensions in the shape. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, +/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time +/// of the Subgraph object, and of any Runtime objects created from the Subgraph. +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on +/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be +/// created for the Value. +/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT +/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. +/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a +/// valid @a external_id was provided, the variable will be initialized with the @a external_id value. +enum xnn_status xnn_define_quantized_tensor_value( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + int32_t zero_point, + float scale, + size_t num_dims, + const size_t* dims, + const void* data, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out); + +enum xnn_status xnn_define_channelwise_quantized_tensor_value( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + const float* scale, + size_t num_dims, + size_t channel_dim, + const size_t* dims, + const void* data, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out); + +/// Validate the dimensions, channel_dim, zero point, datatype, and scale of a quantized tensor-type. +/// +/// @param datatype - type of the tensor elements. +/// @param zero_point - offset from zero to subtract from the quantized elements in the Value. +/// @param scale - multiplication factor to convert quantized elements to real representation. +/// @param num_dims - number of dimensions in the shape. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +enum xnn_status xnn_validate_quantized_tensor( + enum xnn_datatype datatype, + int32_t zero_point, + float scale, + size_t num_dims, + const size_t* dims); + +/// Validate the dimensions, channel_dim, zero point, datatype, and scales of a channelwise quantized tensor-type. +/// +/// @param datatype - type of the tensor elements. +/// @param zero_point - offset from zero to subtract from the quantized elements in the Value. +/// @param scale - per-channel multiplication factors to convert quantized elements to real representation. +/// @param num_dims - number of dimensions in the shape. +/// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters. +/// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution, +/// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in +/// the Depthwise Convolution operators. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +enum xnn_status xnn_validate_channelwise_quantized_tensor( + enum xnn_datatype datatype, + int32_t zero_point, + const float* scale, + size_t num_dims, + size_t channel_dim, + const size_t* dims); + +/// Define a channelwise quantized tensor-type Value and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Value. +/// @param datatype - type of the tensor elements. +/// @param zero_point - offset from zero to subtract from the quantized elements in the Value. +/// @param scale - per-channel multiplication factors to convert quantized elements to real representation. +/// @param num_dims - number of dimensions in the shape. +/// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters. +/// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution, +/// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in +/// the Depthwise Convolution operators. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, +/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time +/// of the Subgraph object, and of any Runtime objects created from the Subgraph. +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on +/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be +/// created for the Value. +/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT +/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. +/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a +/// valid @a external_id was provided, the variable will be initialized with the @a external_id value. +enum xnn_status xnn_define_channelwise_quantized_tensor_value_v2( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + int32_t zero_point, + const float* scale, + size_t num_dims, + size_t channel_dim, + const size_t* dims, + const void* data, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out); + +/// Define a dynamically quantized tensor-type Value and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Value. +/// @param datatype - type of the tensor elements. +/// @param num_dims - number of dimensions in the shape. +/// @param num_non_batch_dims - number of non-batch dimensions in the shape. The leading (num_dims - num_non_batch_dims) +/// dimensions will be flattened and treated as batch size. A set of quantization parameters +/// will be calculated for each batch element. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on +/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be +/// created for the Value. +/// @param flags - binary features of the Value. No supported flags are currently defined. +/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a +/// valid @a external_id was provided, the variable will be initialized with the @a external_id value. +enum xnn_status xnn_define_dynamically_quantized_tensor_value( + xnn_subgraph_t subgraph, + enum xnn_datatype datatype, + size_t num_dims, + size_t num_nonbatch_dims, + const size_t* dims, + uint32_t external_id, + uint32_t flags, + uint32_t* id_out); + +/// Define a Convert Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Convert Node. No supported flags are currently defined. +enum xnn_status xnn_define_convert( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Convolution Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING +/// flag is specified. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param kernel_height - kernel (filter) height. +/// @param kernel_width - kernel (filter) width. +/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). +/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). +/// @param dilation_height - dilation of kernel elements along the height dimension. +/// @param dilation_width - dilation of kernel elements along the width dimension. +/// @param groups - number of convolution groups. +/// @param group_input_channels - number of input channels per group. +/// @param group_output_channels - number of output channels per group. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, groups * group_input_channels] dimensions +/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph +/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] +/// dimensions. +/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If +/// present, the bias tensor must be a 1D tensor defined in the @a subgraph with [groups * +/// group_output_channels] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, OH, OW, groups * group_output_channels] dimensions. +/// @param flags - binary features of the 2D Convolution Node. The only currently supported values is +/// XNN_FLAG_TENSORFLOW_SAME_PADDING. +enum xnn_status xnn_define_convolution_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param padding_top - implicit padding above 2D output data. +/// @param padding_right - implicit padding to the right of 2D output data. +/// @param padding_bottom - implicit padding below 2D output data. +/// @param padding_left - implicit padding to the left of 2D output data. +/// @param adjustment_height - additional elements in the bottom of the 2D output data. +/// @param adjustment_width - additional elements to the right of the 2D output data. +/// @param kernel_height - kernel (filter) height. +/// @param kernel_width - kernel (filter) width. +/// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride). +/// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride). +/// @param dilation_height - dilation of kernel elements along the height dimension. +/// @param dilation_width - dilation of kernel elements along the width dimension. +/// @param groups - number of convolution groups. +/// @param group_input_channels - number of input channels per group. +/// @param group_output_channels - number of output channels per group. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, groups * group_input_channels] dimensions +/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph +/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] +/// dimensions. +/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If +/// present, the bias tensor must be a 1D tensor defined in the @a subgraph with +/// [groups * group_output_channels] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, OH, OW, groups * group_output_channels] dimensions. +/// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined. +enum xnn_status xnn_define_deconvolution_2d( + xnn_subgraph_t subgraph, + uint32_t padding_top, + uint32_t padding_right, + uint32_t padding_bottom, + uint32_t padding_left, + uint32_t adjustment_height, + uint32_t adjustment_width, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t upsampling_height, + uint32_t upsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Depthwise Convolution Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING +/// flag is specified. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param kernel_height - kernel (filter) height. +/// @param kernel_width - kernel (filter) width. +/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). +/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). +/// @param dilation_height - dilation of kernel elements along the height dimension. +/// @param dilation_width - dilation of kernel elements along the width dimension. +/// @param depth_multiplier - ratio of output channels to input channels. +/// @param input_channels - number of input channels. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, input_channels] dimensions +/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph +/// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions. +/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without +/// a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with +/// [input_channels * depth_multiplier] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, OH, OW, input_channels * depth_multiplier] dimensions. +/// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is +/// XNN_FLAG_TENSORFLOW_SAME_PADDING. +enum xnn_status xnn_define_depthwise_convolution_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t depth_multiplier, + size_t input_channels, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Depth To Space Node 2D and add it to a Subgraph. +/// +/// The Depth To Space 2D Node rearranges data from depth into blocks of spatial data (a reverse transform to +/// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values +/// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times +/// smaller than that of the input. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param block_size - the size of the spatial block. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, OC * block_size * block_size] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH * block_size, IW * block_size, OC] dimensions. +/// @param flags - binary features of the input_channels Node. No supported flags are currently defined. +enum xnn_status xnn_define_depth_to_space_2d( + xnn_subgraph_t subgraph, + uint32_t block_size, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +enum xnn_status xnn_define_depth_to_space( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t block_size, + uint32_t flags); + +/// Define a 1D Global Average Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 2 or more dimensions +/// defined in the @a subgraph. Averaging is performed across the second-innermost dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 2 or more +/// dimensions defined in the @a subgraph. +/// @param flags - binary features of the 1D Global Average Pooling Node. The only currently supported value is +/// XNN_FLAG_REDUCE_DIMS. +enum xnn_status xnn_define_global_average_pooling_1d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Global Average Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 3 or more dimensions +/// defined in the @a subgraph. Averaging is performed across the second- and third-innermost +/// dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 3 or more +/// dimensions defined in the @a subgraph. +/// @param flags - binary features of the 2D Global Average Pooling Node. The only currently supported value is +/// XNN_FLAG_REDUCE_DIMS. +enum xnn_status xnn_define_global_average_pooling_2d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 1D Global Sum Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 2 or more dimensions +/// defined in the @a subgraph. Averaging is performed across the second-innermost dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 2 or more +/// dimensions defined in the @a subgraph. +/// @param flags - binary features of the 1D Global Sum Pooling Node. The only currently supported value is +/// XNN_FLAG_REDUCE_DIMS. +enum xnn_status xnn_define_global_sum_pooling_1d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Global Sum Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 3 or more dimensions +/// defined in the @a subgraph. Averaging is performed across the second- and third-innermost +/// dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 3 or more +/// dimensions defined in the @a subgraph. +/// @param flags - binary features of the 2D Global Sum Pooling Node. The only currently supported value is +/// XNN_FLAG_REDUCE_DIMS. +enum xnn_status xnn_define_global_sum_pooling_2d( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Average Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING +/// flag is specified. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param pooling_height - pooling (kernel) height. +/// @param pooling_width - pooling (kernel) width. +/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding +/// to vertically adjacent output pixels. +/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding +/// to horizontally adjacent output pixels. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, channels] dimensions +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, OH, OW, channels] dimensions. +/// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is +/// XNN_FLAG_TENSORFLOW_SAME_PADDING. +enum xnn_status xnn_define_average_pooling_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Fully Connected Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the +/// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least +/// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if +/// input is a 2D tensor, it must have [batch_size, input_channels] dimensions. +/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be +/// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of +/// [num_input_elements] dimensions, then reshaped into a 2D tensor of +/// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the +/// total number of elements in the input tensor. +/// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph. +/// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have +/// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is +/// specified, the filter tensor must have [input_channels, output_channels] dimensions. +/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias. +/// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels] +/// dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph. +/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same +/// dimensionality as the input tensor, all its dimensions but the last one must match the +/// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must +/// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output +/// must be a 2D tensor of [batch_size, output_channels] dimensions. +/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of +/// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the +/// total number of elements in the input tensor. +/// @param flags - binary features of the Fully Connected Node. The only currently supported values are +/// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS. +enum xnn_status xnn_define_fully_connected( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Sparse Fully Connected Node and add it to a Subgraph. +/// +/// This operator is experimental, and will be removed in the future. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the +/// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least +/// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if +/// input is a 2D tensor, it must have [batch_size, input_channels] dimensions. +/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be +/// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of +/// [num_input_elements] dimensions, then reshaped into a 2D tensor of +/// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the +/// total number of elements in the input tensor. +/// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph. +/// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have +/// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is +/// specified, the filter tensor must have [input_channels, output_channels] dimensions. +/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias. +/// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels] +/// dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph. +/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same +/// dimensionality as the input tensor, all its dimensions but the last one must match the +/// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must +/// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output +/// must be a 2D tensor of [batch_size, output_channels] dimensions. +/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of +/// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the +/// total number of elements in the input tensor. +/// @param flags - binary features of the Fully Connected Node. The only currently supported values are +/// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS. +enum xnn_status xnn_define_fully_connected_sparse( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t filter_id, + uint32_t bias_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D Max Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING +/// flag is specified. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if +/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. +/// @param pooling_height - pooling (kernel) height. +/// @param pooling_width - pooling (kernel) width. +/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding +/// to vertically adjacent output pixels. +/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding +/// to horizontally adjacent output pixels. +/// @param dilation_height - dilation of pooling elements along the height dimension. +/// @param dilation_width - dilation of pooling elements along the width dimension. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, channels] dimensions +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, OH, OW, channels] dimensions. +/// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is +/// XNN_FLAG_TENSORFLOW_SAME_PADDING. +enum xnn_status xnn_define_max_pooling_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2D ArgMax Pooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_padding_top - implicit zero-padding above 2D input data. +/// @param input_padding_right - implicit zero-padding to the right of 2D input data. +/// @param input_padding_bottom - implicit zero-padding below 2D input data. +/// @param input_padding_left - implicit zero-padding to the left of 2D input data. +/// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value. +/// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, channels] dimensions +/// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must +/// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions. +/// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The +/// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] +/// dimensions. +/// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined. +enum xnn_status xnn_define_argmax_pooling_2d( + xnn_subgraph_t subgraph, + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t input_id, + uint32_t output_value_id, + uint32_t output_index_id, + uint32_t flags); + +/// Define a 2D UnPooling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param padding_top - implicit padding above 2D output data. +/// @param padding_right - implicit padding to the right of 2D output data. +/// @param padding_bottom - implicit padding below 2D output data. +/// @param padding_left - implicit padding to the left of 2D output data. +/// @param pooling_height - height of the pooling window. +/// @param pooling_width - width of the pooling window. +/// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor +/// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions. +/// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by +/// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with +/// [N, IH, IW, channels] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, OH, OW, channels] dimensions. +/// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined. +enum xnn_status xnn_define_unpooling_2d( + xnn_subgraph_t subgraph, + uint32_t padding_top, + uint32_t padding_right, + uint32_t padding_bottom, + uint32_t padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t input_value_id, + uint32_t input_index_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2-Input Add Node and add it to a Subgraph. +/// +/// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Add Node. No supported flags are currently defined. +enum xnn_status xnn_define_add2( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2-Input Multiply Node and add it to a Subgraph. +/// +/// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Multiply Node. No supported flags are currently defined. +enum xnn_status xnn_define_multiply2( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +// Cap operations applied to logits (Q * K) of attention operator. +enum xnn_attention_logits_cap_type { + // No capping. + xnn_attention_logits_cap_type_none = 0, + // Cap the absolute values of logits by tanh: tanh(logits / cap) * cap + xnn_attention_logits_cap_type_tanh +}; + +// Params when the cap type is xnn_attention_logits_cap_type_tanh. +struct xnn_attention_logits_cap_tanh_params { + float cap; +}; + +/// Define a Scaled Dot-Product Attention Node and add it to a Subgraph. +/// +/// This operator is experimental. +/// +/// The Scaled Dot-Product Attention Node computes a multi-head or multi-query scaled dot attention on the query, key, +/// and value tensors. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param cap_type - type of cap to be applied to the logits. +/// @param cap_params - parameters for the cap. Must be a pointer to xnn_attention_logits_cap_tanh_params if cap_type +/// is xnn_attention_logits_cap_type_tanh. +/// @param query_id - Value ID for the query tensor. The query tensor must be a 3+-dimensional tensor defined in the +/// @a subgraph with the dimensions as [*, H, T, C], where H/T/C are the heads/tokens/channels, and * +/// is the 0 or more dimensions treated as batch size. +/// @param key_id - Value ID for the key tensor. The key tensor must be a 2+--dimensional tensor defined in the +/// @a subgraph. It can have the same number of dimensions as the query, with the dimensions as +/// [*, H, U, C] (multi-head), or have 1 less dimension than the query, with the dimensions as +/// as [*, U, C] (multi-query, number of heads omitted implies single head), where H/U/C are the +/// heads/key_value_tokens/channels, and * is the 0 or more dimensions treated as batch size. These +/// batch size dimensions must be the same as query. +/// @param value_id - Value ID for the value tensor. The value tensor must be a 2+--dimensional tensor defined in the +/// @a subgraph. It can have the same number of dimensions as the query, with the dimensions as +/// [*, H, U, D] (multi-head), or have 1 less dimension than the query, with the dimensions as +/// as [*, U, D] (multi-query, number of heads omitted implies single head), where H/U/D are the +/// heads/key_value_tokens/value_channels, and * is the 0 or more dimensions treated as batch size. +/// These batch size dimensions must be the same as query and key. +/// @param scale_id - Value ID for the scale tensor. The scale tensor must be a 1D tensor defined in the @a subgraph +/// with [C] dimensions. The query tensor is multiplied with this scale tensor before the dot product +/// with the key tensor. +/// @param mask_id - Value ID for the mask tensor. The mask tensor must be a 2D tensor defined in the @a subgraph with +/// [T, U] dimensions. The mask tensor is added to the logits (query dot value). +/// @param output_id - Value ID for the output tensor. The output tensor must be a 3+-dimensional tensor defined in the +/// @a subgraph with the dimensions as [*, H, T, D], where H/T/D are the heads/tokens/value_channels, +/// and * is the 0 or more dimensions treated as batch size. These batch size dimensions must be the +/// same as query, key, and value. +/// @param flags - binary features of the Scaled Dot Product Attention Node. No supported flags are currently defined. +enum xnn_status xnn_define_scaled_dot_product_attention( + xnn_subgraph_t subgraph, + enum xnn_attention_logits_cap_type cap_type, + const void* cap_params, + uint32_t query_id, + uint32_t key_id, + uint32_t value_id, + uint32_t scale_id, + uint32_t mask_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Subtract Node and add it to a Subgraph. +/// +/// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Subtract Node. No supported flags are currently defined. +enum xnn_status xnn_define_subtract( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Divide Node and add it to a Subgraph. +/// +/// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Divide Node. No supported flags are currently defined. +enum xnn_status xnn_define_divide( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2-Input Maximum Node and add it to a Subgraph. +/// +/// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Maximum Node. No supported flags are currently defined. +enum xnn_status xnn_define_maximum2( + xnn_subgraph_t subgraph, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2-Input Minimum Node and add it to a Subgraph. +/// +/// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Minimum Node. No supported flags are currently defined. +enum xnn_status xnn_define_minimum2( + xnn_subgraph_t subgraph, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Squared Difference Node and add it to a Subgraph. +/// +/// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting +/// rules. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the second +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in +/// the @a subgraph with each dimension either equal to the corresponding dimension of the first +/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along +/// that dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension +/// of the two inputs. +/// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined. +enum xnn_status xnn_define_squared_difference( + xnn_subgraph_t subgraph, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Constant Pad Node with static padding specification and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array +/// must have as many elements as the number of dimensions in the input tensor. +/// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array +/// must have as many elements as the number of dimensions in the input tensor. +/// @param padding_value - constant value used to initialize padding elements. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor with padding. +/// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined. +enum xnn_status xnn_define_static_constant_pad( + xnn_subgraph_t subgraph, + const size_t* pre_paddings, + const size_t* post_paddings, + float padding_value, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Mean Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param num_reduction_axes - number of axes along which mean is computed. +/// @param reduction_axes - axes along which mean is computed. +/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with at least +/// @a num_reduction_axes dimensions defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor defined in the +/// @a subgraph with @a num_reduction_axes fewer dimensions than the input tensor (if +/// XNN_FLAG_REDUCE_DIMS is specified), or has same dimension rank but the dimension at +/// @a reduction_axes reduced to 1 (if XNN_FLAG_REDUCE_DIMS is not specified). +/// @param flags - binary features of the Mean Node. The only currently supported value is XNN_FLAG_REDUCE_DIMS +enum xnn_status xnn_define_static_mean( + xnn_subgraph_t subgraph, + size_t num_reduction_axes, + const size_t* reduction_axes, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2-Input Concatenate Node and add it to a Subgraph. +/// +/// The 2-Input Concatenate Node concatenates two tensors along a specified axis. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param axis - the axis to concatenate the two input tensors along +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// second input. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// first input. +/// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the dimension of both inputs, except the axis +/// dimension, where it is the sum of the corresponding dimensions of both inputs. +/// @param flags - binary features of the Concatenate Node. No supported flags are currently defined. +enum xnn_status xnn_define_concatenate2( + xnn_subgraph_t subgraph, + size_t axis, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 3-Input Concatenate Node and add it to a Subgraph. +/// +/// The 3-Input Concatenate Node concatenates three tensors along a specified axis. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param axis - the axis to concatenate the three input tensors along +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param input3_id - Value ID for the third input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the dimension of all inputs, except the axis +/// dimension, where it is the sum of the corresponding dimensions of all inputs. +/// @param flags - binary features of the Concatenate Node. No supported flags are currently defined. +enum xnn_status xnn_define_concatenate3( + xnn_subgraph_t subgraph, + size_t axis, + uint32_t input1_id, + uint32_t input2_id, + uint32_t input3_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 4-Input Concatenate Node and add it to a Subgraph. +/// +/// The 4-Input Concatenate Node concatenates four tensors along a specified axis. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param axis - the axis to concatenate the four input tensors along +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param input3_id - Value ID for the third input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param input4_id - Value ID for the fourth input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the +/// other inputs. +/// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined +/// in the @a subgraph with each dimension equal to the dimension of all inputs, except the axis +/// dimension, where it is the sum of the corresponding dimensions of all inputs. +/// @param flags - binary features of the Concatenate Node. No supported flags are currently defined. +enum xnn_status xnn_define_concatenate4( + xnn_subgraph_t subgraph, + size_t axis, + uint32_t input1_id, + uint32_t input2_id, + uint32_t input3_id, + uint32_t input4_id, + uint32_t output_id, + uint32_t flags); + +enum xnn_status xnn_define_concatenate5( + xnn_subgraph_t subgraph, + size_t axis, + uint32_t input1_id, + uint32_t input2_id, + uint32_t input3_id, + uint32_t input4_id, + uint32_t input5_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Copy Node and add it to a Subgraph. +/// +/// The Copy Node copies an input tensor to an output tensor. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the first input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Copy Node. No supported flags are currently defined. +enum xnn_status xnn_define_copy( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a 2-Output Split Node and add it to a Subgraph. +/// +/// The 2-Output Split Node splits an input tensor into two output tensors along a specified axis evenly. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param split_dim - the dimension to split the input tensor along +/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a +/// subgraph. +/// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined +/// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension +/// of the second output. The split_dim dimension is half of the input's split_dim. +/// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor +/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding +/// dimension of the first output. The split_dim dimension is half of the input's split_dim. +/// @param flags - binary features of the Split Node. No supported flags are currently defined. +enum xnn_status xnn_define_even_split2( + xnn_subgraph_t subgraph, + size_t split_dim, + uint32_t input_id, + uint32_t output1_id, + uint32_t output2_id, + uint32_t flags); + +/// Define a 3-Output Split Node and add it to a Subgraph. +/// +/// The 3-Output Split Node splits an input tensor into three output tensors along a specified axis evenly. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param split_dim - the dimension to split the input tensor along +/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a +/// subgraph. +/// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined +/// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension +/// of the second and third output. The split_dim dimension is one third of the input's split_dim. +/// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor +/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding +/// dimension of the first and third output. The split_dim dimension is one third of the input's +/// split_dim. +/// @param output3_id - Value ID for the third output tensor. The output tensor must be an N-dimensional tensor +/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding +/// dimension of the second and third output. The split_dim dimension is one third of the input's +/// split_dim. +/// @param flags - binary features of the Split Node. No supported flags are currently defined. +enum xnn_status xnn_define_even_split3( + xnn_subgraph_t subgraph, + size_t split_dim, + uint32_t input_id, + uint32_t output1_id, + uint32_t output2_id, + uint32_t output3_id, + uint32_t flags); + +/// Define a 4-Output Split Node and add it to a Subgraph. +/// +/// The 4-Output Split Node splits an input tensor into four output tensors along a specified axis evenly. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param split_dim - the dimension to split the input tensor along +/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a +/// subgraph. +/// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined +/// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension +/// of the other output tensors. The split_dim dimension is one fourth of the input's split_dim. +/// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor +/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding +/// dimension of the other output tensors. The split_dim dimension is one fourth of the input's +/// split_dim. +/// @param output3_id - Value ID for the third output tensor. The output tensor must be an N-dimensional tensor +/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding +/// dimension of the other output tensors. The split_dim dimension is one fourth of the input's +/// split_dim. +/// @param output4_id - Value ID for the fourth output tensor. The output tensor must be an N-dimensional tensor +/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding +/// dimension of the other output tensors. The split_dim dimension is one fourth of the input's +/// split_dim. +/// @param flags - binary features of the Split Node. No supported flags are currently defined. +enum xnn_status xnn_define_even_split4( + xnn_subgraph_t subgraph, + size_t split_dim, + uint32_t input_id, + uint32_t output1_id, + uint32_t output2_id, + uint32_t output3_id, + uint32_t output4_id, + uint32_t flags); + +/// Define a Reshape Node with static shape specification and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param num_dims - number of shape dimensions in the output tensor. +/// @param new_shape - shape dimensions of the output tensor. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor with padding. +/// @param flags - binary features of the Reshape Node. No supported flags are currently defined. +enum xnn_status xnn_define_static_reshape( + xnn_subgraph_t subgraph, + size_t num_dims, + const size_t* new_shape, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Node that reshapes a tensor to two dimensions, retaining the +/// trailing dimension, and add it to a Subgraph. +/// +/// This operator is experimental. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be +/// defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be +/// defined in the @a subgraph, and its +/// size must match the shape of the input tensor with +/// padding. +/// @param flags - binary features of the Reshape Node. No supported flags are +/// currently defined. +enum xnn_status xnn_define_reshape_2d(xnn_subgraph_t subgraph, + uint32_t input_id, uint32_t output_id, + uint32_t flags); + +/// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param new_height - height dimension of the output tensor. +/// @param new_width - width dimension of the output tensor. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, H, W, C] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, new_height, new_width, C] dimensions. +/// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are +/// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive. +enum xnn_status xnn_define_static_resize_bilinear_2d( + xnn_subgraph_t subgraph, + size_t new_height, + size_t new_width, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, H, W, channels] dimensions. +/// @param slope_id - Value ID for the slope tensor. The slope tensor must be a 1D tensor defined in the @a subgraph with +/// [channels] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, H, W, channels] dimensions. +/// @param flags - binary features of the PReLU Node. No supported flags are currently defined. +enum xnn_status xnn_define_prelu( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t slope_id, + uint32_t output_id, + uint32_t flags); + +/// Define a RoPE (Rotary Positional Embeddings) Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param max_tokens - maximum possible number of tokens (maximum sequence length) of the input/output tensors. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [batch, tokens, heads, channels] dimensions. +/// @param weights_id - Value ID for the weights tensor. The weights tensor must be a 2D tensor defined in the +/// @a subgraph with [max_tokens, channels] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [batch, tokens, heads, channels] dimensions. +/// @param flags - binary features of the RoPE Node. No supported flags are currently defined. +enum xnn_status xnn_define_rope( + xnn_subgraph_t subgraph, + size_t max_sequence_size, + uint32_t input_id, + uint32_t weights_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Abs Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Abs Node. No supported flags are currently defined. +enum xnn_status xnn_define_abs( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Bankers' Rounding Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined. +enum xnn_status xnn_define_bankers_rounding( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Batch Matrix Multiply Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph. It must be at least 3D. The first N-2 dimensions must match the second input +/// tensor. The last 2 dimensions are [M, K]. If XNN_FLAG_TRANSPOSE_B is not specified, the last +/// dimension must match the second last dimension of the second input tensor. If +/// XNN_FLAG_TRANSPOSE_B is specified, the last dimension must match the last dimension of the +/// second input tensor. +/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined +/// in the @a subgraph. It must be at least 3D. The first N-2 dimensions must match the first input +/// tensor. If XNN_FLAG_TRANSPOSE_B is not specified, the last 2 dimensions are [K, N], and the +/// second last dimension must match the last dimension of the first input tensor. If +/// XNN_FLAG_TRANSPOSE_B is specified, the last 2 dimensions are [N, K], and the last dimension must +/// match the last dimension of the first input tensor. +/// @param output_id - Value ID for the output tensor. The output tensor must be an N-dimensional tensor defined in the +/// @a subgraph. It must be at least 3D. The first N-2 dimensions must match the first and second +/// input tensors . The last 2 dimensions must be [M, N]. +/// @param flags - binary features of the Batch Matrix Multiply Node. The only currently supported value is +/// XNN_FLAG_TRANSPOSE_B. +enum xnn_status xnn_define_batch_matrix_multiply( + xnn_subgraph_t subgraph, + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Ceiling Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Ceiling Node. No supported flags are currently defined. +enum xnn_status xnn_define_ceiling( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Clamp Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param output_min - lower bound for clipping output values. +/// @param output_max - upper bound for clipping output values. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Clamp Node. No supported flags are currently defined. +enum xnn_status xnn_define_clamp( + xnn_subgraph_t subgraph, + float output_min, + float output_max, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param alpha - scale factor for negative output elements. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the ELU Node. No supported flags are currently defined. +enum xnn_status xnn_define_elu( + xnn_subgraph_t subgraph, + float alpha, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Floor Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Floor Node. No supported flags are currently defined. +enum xnn_status xnn_define_floor( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a HardSwish Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the HardSwish Node. No supported flags are currently defined. +enum xnn_status xnn_define_hardswish( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Leaky ReLU Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param negative_slope - scale factor for negative input elements. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined. +enum xnn_status xnn_define_leaky_relu( + xnn_subgraph_t subgraph, + float negative_slope, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Negate Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Negate Node. No supported flags are currently defined. +enum xnn_status xnn_define_negate( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Sigmoid Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined. +enum xnn_status xnn_define_sigmoid( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a SoftMax Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at +/// least one dimension. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the SoftMax Node. No supported flags are currently defined. +enum xnn_status xnn_define_softmax( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Space To Depth 2D Node and add it to a Subgraph. +/// +/// The Space To Depth 2D Node rearranges blocks of spatial data into blocks (a reverse transform to Depth To Space 2D). +/// For a given input pixel, an output square of pixels with side @a block_size is formed from values in the +/// corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times greater +/// than that of the input. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param block_size - the size of the spatial block. +/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH * block_size, IW * block_size, OC] dimensions. +/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph +/// with [N, IH, IW, OC * block_size * block_size] dimensions. +/// @param flags - binary features of the input_channels Node. No supported flags are currently defined. +enum xnn_status xnn_define_space_to_depth_2d( + xnn_subgraph_t subgraph, + uint32_t block_size, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Square Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Square Node. No supported flags are currently defined. +enum xnn_status xnn_define_square( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Square Root Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Square Root Node. No supported flags are currently defined. +enum xnn_status xnn_define_square_root( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Reciprocal Square Root Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be +/// defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be +/// defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Square Root Node. No supported flags +/// are currently defined. +enum xnn_status xnn_define_reciprocal_square_root(xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Static Slice Node add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param num_dims - number of shape dimensions in the input and output tensor. +/// @param offsets - offsets in each dimension of the input tensor. This array must have @a num_dims elements. +/// @param sizes - size of each dimension in output tensor. This array must have @a num_dims elements. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// dimensions must match @a sizes. +/// @param flags - binary features of the Static Slice Node. No supported flags are currently defined. +enum xnn_status xnn_define_static_slice( + xnn_subgraph_t subgraph, + size_t num_dims, + const size_t* offsets, + const size_t* sizes, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Static Transpose Node and add it to a Subgraph. +/// +/// The Static Transpose Node applies a generalized transpose to the input tensor using the permuation in perm. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in +/// the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be an N-dimensional tensor defined +/// in the @a subgraph with each dimension equal to its corresponding permuted input dimension. +/// @param num_dims - the number of permutation dimensions. This must be equal to the number of input dimensions. +/// @param perm - The permutation of the axis of the input tensor. The perm array must must contain 0 to N-1 in the +/// permuted order. +/// @param flags - binary features of the Static Transpose Node. No supported flags are currently defined. +enum xnn_status xnn_define_static_transpose( + xnn_subgraph_t subgraph, + size_t num_dims, + const size_t* perm, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Define a Tanh Node and add it to a Subgraph. +/// +/// @param subgraph - a Subgraph object that will own the created Node. +/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. +/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its +/// shape must match the shape of the input tensor. +/// @param flags - binary features of the Tanh Node. No supported flags are currently defined. +enum xnn_status xnn_define_tanh( + xnn_subgraph_t subgraph, + uint32_t input_id, + uint32_t output_id, + uint32_t flags); + +/// Code cache is a cache for JIT generated code. +typedef struct xnn_code_cache* xnn_code_cache_t; + +/// Weights cache can be finalized in these ways: +enum xnn_weights_cache_finalization_kind { + /// Weights cache is finalized, no insert operations into the weights cache is allowed, even if the "inserted" + /// weights already exist in thee cache. Weights cache memory will also be trimmed to page boundary and set to + /// read-only (to prevent writes). + xnn_weights_cache_finalization_kind_hard, + /// Weights cache will be finalized with some extra space at the end, this allows for "inserting" into the cache only + /// if the weights are already in the cache, and errors on inserting uncached weights. There is memory overhead. + xnn_weights_cache_finalization_kind_soft, +}; + +/// A combination of multiple factors to uniquely locate the weights cache. +struct xnn_weights_cache_look_up_key { + /// The unique seed for each ukernel. It is guaranteed that each ukernel provides + /// a consistent and identical seed. + uint32_t seed; + /// Pointer to the original kernel. + const void* kernel; + /// Pointer to the original bias, could be NULL. + const void* bias; +}; + +/// A group of function pointers to manage weights cache. All functions may be +/// called on multi threads. +struct xnn_weights_cache_provider { + /// User-specified pointer that will be passed as-is to all functions in this + /// structure. + void* context; + + /// Looks up the tuple of {cache_key, kernel, bias} in the cache. If it is found, + /// returns the offset to the found entry for reuse. Otherwise, returns SIZE_MAX. + /// @param context - The user-specified pointer from xnn_weights_cache_provider structure. + /// @param cache_key - The key used to locate the weights cache entry. + size_t (*look_up)(void* context, const struct xnn_weights_cache_look_up_key* cache_key); + + /// Ensures that cache has enough space for `n` bytes. Returns the address to + /// store weight cache. Returns NULL if fails to reserve space. + /// @param context - The user-specified pointer from xnn_weights_cache_provider structure. + /// @param n - size to be reserved. + void* (*reserve_space)(void* context, size_t n); + + /// Looks up packed weights at `ptr` in the cache. If it is found, reuse it. + /// Otherwise, it is added to the cache. Returns the offset to the cache. + /// @param context - The user-specified pointer from xnn_weights_cache_provider structure. + /// @param cache_key - The key used to locate the weights cache entry. + /// @param ptr - pointer pointing to the packed weight. + /// @param size - size of the packed weight. + size_t (*look_up_or_insert)(void* context, const struct xnn_weights_cache_look_up_key* cache_key, void* ptr, size_t size); + + /// Returns whether the cache is finalized. + /// @param context - The user-specified pointer from xnn_weights_cache_provider structure. + bool (*is_finalized)(void* context); + + /// Returns the absolute pointer corresponding to `offset`, where the offset is returned from + /// `look_up` or `get_or_insert`. This function must be called after finalize. + /// @param context - The user-specified pointer from xnn_weights_cache_provider structure. + /// @param offset - offset to the start of internal buffer + void* (*offset_to_addr)(void* context, size_t offset); + + /// Destroy a weights cache object, as well as memory used for the cache. + /// @param context - The user-specified pointer from xnn_weights_cache_provider structure. + enum xnn_status (*delete_cache)(void* context); +}; + +/// Weights cache is a cache for packed weights. It can be reused between runtimes. +typedef struct xnn_weights_cache_provider* xnn_weights_cache_t; + +/// Create a weights cache object specifying the initial size of weights cache (in bytes). +/// +/// @param[in] size - initial capacity of the weights cache (in bytes), i.e. it can hold size bytes without growing. +/// @param weights_cache_out - pointer to the variable that will be initialized to a handle to the weights cache provider +/// upon successful return. Once created, the weights cache provider can be shared between +/// different Runtime objects. +enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out); + +enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out); + +/// Finalizes the weights cache. The kind of finalization is specified by `finalization_kind`. +/// @param weights_cache - the weights cache object to finalize. +/// @param finalization_kind - the kind of finalization. +enum xnn_status xnn_finalize_weights_cache( + xnn_weights_cache_t weights_cache, + enum xnn_weights_cache_finalization_kind finalization_kind); + +/// Destroy a weights cache object, as well as memory used for the cache. +/// @param weights_cache - the weights cache object to destroy. +enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache); + +typedef struct xnn_workspace* xnn_workspace_t; + +/// Create a workspace object. +/// @param workspace_out - pointer to the variable that will be initialized to a handle to the workspace object upon +/// successful return. Once created, the workspace can be shared between different Runtime +/// objects. +enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out); +/// Destroy a workspace object, as well as memory used by the workspace. Object destruction can be deferred until all +/// Runtime objects created with this workspace are destroyed. +/// @param workspace - the workspace object to destroy. +enum xnn_status xnn_release_workspace(xnn_workspace_t workspace); + +/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. +typedef struct xnn_runtime* xnn_runtime_t; + +enum xnn_profile_info { + /// Returns a size_t containing the number of operators. + xnn_profile_info_num_operators, + /// Returns a char[] containing the null character separated names of all operators. + xnn_profile_info_operator_name, + /// Returns a uint64_t[] with the runtimes of all operators in the same order as xnn_profile_info_operator_name. + xnn_profile_info_operator_timing, +}; + +/// Return profile information for all operators. +/// +/// @param runtime - a Runtime object created with @ref xnn_create_runtime, @ref xnn_create_runtime_v2 or +/// @ref xnn_create_runtime_v3. +/// @param param_name - type of profile information required. +/// @param param_value_size - the size in bytes of memory pointed to by param_value. If this is not sufficient then +/// param_value_size_ret will be set to the required size and xnn_status_out_of_memory will be +/// returned. +/// @param param_value - a pointer to memory location where appropriate values for a given param_value will be written. +/// @param param_value_size_ret - returns number of bytes required to write the result if param_value_size is not +/// sufficient. +enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime, + enum xnn_profile_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +/// Create a Runtime object from a subgraph. +/// +/// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or +/// Nodes can be added to the runtime once it is constructed. +/// @param weights_cache - a cache for packed weights. The runtime will look up and reuse packed weights in this cache, +/// this will reduce memory allocated for packed weights. +/// @param workspace - a workspace to hold internal tensors. The runtime will allocate space used for internal tensors +/// and track them using workspace. Workspace can be shared and reused across different runtimes. If +/// workspace is NULL, there will be no sharing: each runtime has its own workspace. +/// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread +/// pool is NULL, the computation would run on the caller thread without parallelization. +/// @param flags - binary features of the runtime. The only currently supported values are +/// XNN_FLAG_HINT_SPARSE_INFERENCE, XNN_FLAG_HINT_FP16_INFERENCE, XNN_FLAG_FORCE_FP16_INFERENCE, +/// XNN_FLAG_YIELD_WORKERS, and XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER. If XNN_FLAG_YIELD_WORKERS is +/// specified, worker threads would be yielded to the system scheduler after processing the last operator +/// in the Runtime. If XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER is specified, convolution operators will +/// initialize indirection buffers on each inference run using temporary memory in the workspace, instead +/// of initializing persistent indirection buffers once. +/// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon +/// successful return. Once constructed, the Runtime object is independent of the Subgraph object +/// used to create it. +enum xnn_status xnn_create_runtime_v4( + xnn_subgraph_t subgraph, + xnn_weights_cache_t weights_cache, + xnn_workspace_t workspace, + pthreadpool_t threadpool, + uint32_t flags, + xnn_runtime_t* runtime_out); + +enum xnn_status xnn_create_runtime_v3( + xnn_subgraph_t subgraph, + xnn_weights_cache_t weights_cache, + pthreadpool_t threadpool, + uint32_t flags, + xnn_runtime_t* runtime_out); + +enum xnn_status xnn_create_runtime_v2( + xnn_subgraph_t subgraph, + pthreadpool_t threadpool, + uint32_t flags, + xnn_runtime_t* runtime_out); + +enum xnn_status xnn_create_runtime( + xnn_subgraph_t subgraph, + xnn_runtime_t* runtime_out); + +struct xnn_external_value { + uint32_t id; + void* data; +}; + +/// Reshape an external value. +/// +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on +/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be +/// created for the Value. +/// @param num_dims - number of dimensions in the shape. +/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. +/// XNNPACK does not keep any pointers to this array after the function returns. +enum xnn_status xnn_reshape_external_value( + xnn_runtime_t runtime, + uint32_t external_id, + size_t num_dims, + const size_t* dims); + +/// Get the external value shape. +/// +/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on +/// the Subgraph creation. The external ID can not be XNN_INVALID_VALUE_ID. +/// @param num_dims - A valid pointer into which the number of dimensions in the shape will be written. It can not be larger than XNN_MAX_TENSOR_DIMS. +/// @param dims - pointer to an array of @a num_dims shape dimensions. This pointer can't be NULL. It must be large enough to hold +/// at least @a num_dims elements. XNNPACK does not keep any pointers to this array after the function returns. +enum xnn_status xnn_get_external_value_shape( + xnn_runtime_t runtime, + uint32_t external_id, + size_t* num_dims, + size_t* dims); + +/// Reshape the XNNPACK runtime. +/// +/// Propgates the shapes of input tensors through the graph to determine the shapes of intermediate and output tensors. +/// Memory is allocated if required. Output tensor shapes are returned by xnn_get_external_value_shape. +/// +/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. +enum xnn_status xnn_reshape_runtime( + xnn_runtime_t runtime); + +/// Deprecated. Use xnn_reshape_runtime and xnn_setup_runtime_v2. +/// +/// Setup data pointers for external inputs and outputs in a Runtime object and +/// allocate memory. +/// +/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. +/// @param num_external_values - the number of external inputs and outputs specified in this call. This number must +/// match the number of external inputs and outputs in the runtime, i.e. all external +/// inputs and outputs in the runtime must be specified in one call. +/// @param external_values - array with location information for all external inputs and outputs in the runtime. +enum xnn_status xnn_setup_runtime( + xnn_runtime_t runtime, + size_t num_external_values, + const struct xnn_external_value* external_values); + +/// Setup data pointers for external inputs and outputs in a Runtime object. +/// Should be called after xnn_reshape_runtime. +/// +/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. +/// @param num_external_values - the number of external inputs and outputs specified in this call. This number must +/// match the number of external inputs and outputs in the runtime, i.e. all external +/// inputs and outputs in the runtime must be specified in one call. +/// @param external_values - array with location information for all external inputs and outputs in the runtime. +enum xnn_status xnn_setup_runtime_v2( + xnn_runtime_t runtime, + size_t num_external_values, + const struct xnn_external_value* external_values); + +/// Execute forward pass for all operators in the runtime. +/// +/// @param runtime - the Runtime object with the execution plan to invoke. +enum xnn_status xnn_invoke_runtime( + xnn_runtime_t runtime); + +/// Destroy a Runtime object, as well as operators and memory associated with it. +/// +/// @param runtime - the Runtime object to destroy. +enum xnn_status xnn_delete_runtime( + xnn_runtime_t runtime); + +typedef struct xnn_operator* xnn_operator_t; + +enum xnn_status xnn_run_operator( + xnn_operator_t op, + pthreadpool_t threadpool); + +enum xnn_status xnn_delete_operator( + xnn_operator_t op); + + +/// Operator API: +/// - create operator will create and populate a xnn_operator_t +/// - reshape operator will update fields in xnn_operator_t with shape/dimensions and parallelization information +/// - setup operator will update pointers to input and outputs +/// Each supported operator must have a create, reshape, and setup function. (Optionally a run function.) +/// Operators listed below are in alphabetical order by operator name; within each operator, we sort alphabetically by +/// data layout and type. We also group create, reshape, setup (and optionally run) functions of each operator together. + +enum xnn_status xnn_create_abs_nc_f16( + uint32_t flags, + xnn_operator_t* abs_op_out); + +enum xnn_status xnn_reshape_abs_nc_f16( + xnn_operator_t abs_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_abs_nc_f16( + xnn_operator_t abs_op, + const void* input, + void* output); + +enum xnn_status xnn_create_abs_nc_f32( + uint32_t flags, + xnn_operator_t* abs_op_out); + +enum xnn_status xnn_reshape_abs_nc_f32( + xnn_operator_t abs_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_abs_nc_f32( + xnn_operator_t abs_op, + const float* input, + float* output); + +enum xnn_status xnn_run_abs_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_add_nd_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* add_op_out); + +enum xnn_status xnn_reshape_add_nd_f16( + xnn_operator_t add_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_add_nd_f16( + xnn_operator_t add_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_add_nd_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* add_op_out); + +enum xnn_status xnn_reshape_add_nd_f32( + xnn_operator_t add_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_add_nd_f32( + xnn_operator_t add_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_add_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + float output_min, + float output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_add_nd_qs8( + int8_t input1_zero_point, + float input1_scale, + int8_t input2_zero_point, + float input2_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* add_op_out); + +enum xnn_status xnn_reshape_add_nd_qs8( + xnn_operator_t add_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_add_nd_qs8( + xnn_operator_t add_op, + const int8_t* input1, + const int8_t* input2, + int8_t* output); + +enum xnn_status xnn_run_add_nd_qs8( + size_t num_input1_dims, + const size_t* input1_shape, + int8_t input1_zero_point, + float input1_scale, + size_t num_input2_dims, + const size_t* input2_shape, + int8_t input2_zero_point, + float input2_scale, + const int8_t* input1, + const int8_t* input2, + int8_t* output, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_add_nd_qu8( + uint8_t input1_zero_point, + float input1_scale, + uint8_t input2_zero_point, + float input2_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* add_op_out); + +enum xnn_status xnn_reshape_add_nd_qu8( + xnn_operator_t add_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_add_nd_qu8( + xnn_operator_t add_op, + const uint8_t* input1, + const uint8_t* input2, + uint8_t* output); + +enum xnn_status xnn_run_add_nd_qu8( + size_t num_input1_dims, + const size_t* input1_shape, + uint8_t input1_zero_point, + float input1_scale, + size_t num_input2_dims, + const size_t* input2_shape, + uint8_t input2_zero_point, + float input2_scale, + const uint8_t* input1, + const uint8_t* input2, + uint8_t* output, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t flags, + xnn_operator_t* argmax_pooling_op_out); + +enum xnn_status xnn_reshape_argmax_pooling2d_nhwc_f32( + xnn_operator_t argmax_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32( + xnn_operator_t argmax_pooling_op, + void* workspace, + const float* input, + float* output, + uint32_t* index); + +enum xnn_status xnn_create_average_pooling2d_nhwc_f16( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* average_pooling_op_out); + +enum xnn_status xnn_reshape_average_pooling2d_nhwc_f16( + xnn_operator_t average_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_average_pooling2d_nhwc_f16( + xnn_operator_t average_pooling_op, + void* workspace, + const void* input, + void* output); + +enum xnn_status xnn_create_average_pooling2d_nhwc_f32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* average_pooling_op_out); + +enum xnn_status xnn_reshape_average_pooling2d_nhwc_f32( + xnn_operator_t average_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_average_pooling2d_nhwc_f32( + xnn_operator_t average_pooling_op, + void* workspace, + const float* input, + float* output); + +enum xnn_status xnn_create_average_pooling2d_nhwc_qu8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* average_pooling_op_out); + +enum xnn_status xnn_reshape_average_pooling2d_nhwc_qu8( + xnn_operator_t average_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8( + xnn_operator_t average_pooling_op, + void* workspace, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_bankers_rounding_nc_f16( + uint32_t flags, + xnn_operator_t* rounding_op_out); + +enum xnn_status xnn_reshape_bankers_rounding_nc_f16( + xnn_operator_t rounding_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_bankers_rounding_nc_f16( + xnn_operator_t rounding_op, + const void* input, + void* output); + +enum xnn_status xnn_create_bankers_rounding_nc_f32( + uint32_t flags, + xnn_operator_t* rounding_op_out); + +enum xnn_status xnn_reshape_bankers_rounding_nc_f32( + xnn_operator_t rounding_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_bankers_rounding_nc_f32( + xnn_operator_t rounding_op, + const float* input, + float* output); + +enum xnn_status xnn_run_bankers_rounding_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_batch_matrix_multiply_nc_f16( + uint32_t flags, + xnn_operator_t* batch_matrix_multiply_op); + +enum xnn_status xnn_reshape_batch_matrix_multiply_nc_f16( + xnn_operator_t batch_matrix_multiply_op, + size_t batch_size, + size_t m, + size_t k, + size_t n, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_batch_matrix_multiply_nc_f16( + xnn_operator_t batch_matrix_multiply_op, + void* workspace, + const void* lhs_input, + const void* rhs_input, + void* output); + +enum xnn_status xnn_create_batch_matrix_multiply_nc_f32( + uint32_t flags, + xnn_operator_t* batch_matrix_multiply_op); + +enum xnn_status xnn_reshape_batch_matrix_multiply_nc_f32( + xnn_operator_t batch_matrix_multiply_op, + size_t batch_size, + size_t m, + size_t k, + size_t n, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_batch_matrix_multiply_nc_f32( + xnn_operator_t batch_matrix_multiply_op, + void* workspace, + const float* lhs_input, + const float* rhs_input, + float* output); + +enum xnn_status xnn_create_ceiling_nc_f16( + uint32_t flags, + xnn_operator_t* ceiling_op_out); + +enum xnn_status xnn_reshape_ceiling_nc_f16( + xnn_operator_t ceiling_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_ceiling_nc_f16( + xnn_operator_t ceiling_op, + const void* input, + void* output); + +enum xnn_status xnn_create_ceiling_nc_f32( + uint32_t flags, + xnn_operator_t* ceiling_op_out); + +enum xnn_status xnn_run_ceiling_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_reshape_ceiling_nc_f32( + xnn_operator_t ceiling_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_ceiling_nc_f32( + xnn_operator_t ceiling_op, + const float* input, + float* output); + +enum xnn_status xnn_create_channel_shuffle_nc_x8( + size_t groups, + size_t group_channels, + size_t input_stride, + size_t output_stride, + uint32_t flags, + xnn_operator_t* channel_shuffle_op_out); + +enum xnn_status xnn_reshape_channel_shuffle_nc_x8( + xnn_operator_t channel_shuffle_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_channel_shuffle_nc_x8( + xnn_operator_t channel_shuffle_op, + const void* input, + void* output); + +enum xnn_status xnn_create_channel_shuffle_nc_x32( + size_t groups, + size_t group_channels, + size_t input_stride, + size_t output_stride, + uint32_t flags, + xnn_operator_t* channel_shuffle_op_out); + +enum xnn_status xnn_reshape_channel_shuffle_nc_x32( + xnn_operator_t channel_shuffle_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_channel_shuffle_nc_x32( + xnn_operator_t channel_shuffle_op, + const void* input, + void* output); + +enum xnn_status xnn_create_clamp_nc_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* clamp_op_out); + +enum xnn_status xnn_reshape_clamp_nc_f16( + xnn_operator_t clamp_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_clamp_nc_f16( + xnn_operator_t clamp_op, + const void* input, + void* output); + +enum xnn_status xnn_create_clamp_nc_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* clamp_op_out); + +enum xnn_status xnn_reshape_clamp_nc_f32( + xnn_operator_t clamp_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_clamp_nc_f32( + xnn_operator_t clamp_op, + const float* input, + float* output); + +enum xnn_status xnn_run_clamp_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + float output_min, + float output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_clamp_nc_s8( + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* clamp_op_out); + +enum xnn_status xnn_reshape_clamp_nc_s8( + xnn_operator_t clamp_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_clamp_nc_s8( + xnn_operator_t clamp_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_clamp_nc_u8( + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* clamp_op_out); + +enum xnn_status xnn_reshape_clamp_nc_u8( + xnn_operator_t clamp_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_clamp_nc_u8( + xnn_operator_t clamp_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_constant_pad_nd_x8( + const void* padding_value, + uint32_t flags, + xnn_operator_t* constant_pad_op_out); + +enum xnn_status xnn_reshape_constant_pad_nd_x8( + xnn_operator_t constant_pad_op, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_padding, + const size_t* post_padding, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_constant_pad_nd_x8( + xnn_operator_t constant_pad_op, + const void* input, + void* output); + +enum xnn_status xnn_run_constant_pad_nd_x8( + uint32_t flags, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_paddings, + const size_t* post_paddings, + const void* input, + void* output, + const void* padding_value, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_constant_pad_nd_x16( + const void* padding_value, + uint32_t flags, + xnn_operator_t* constant_pad_op_out); + +enum xnn_status xnn_reshape_constant_pad_nd_x16( + xnn_operator_t constant_pad_op, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_padding, + const size_t* post_padding, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_constant_pad_nd_x16( + xnn_operator_t constant_pad_op, + const void* input, + void* output); + +enum xnn_status xnn_run_constant_pad_nd_x16( + uint32_t flags, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_paddings, + const size_t* post_paddings, + const void* input, + void* output, + const void* padding_value, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_constant_pad_nd_x32( + const void* padding_value, + uint32_t flags, + xnn_operator_t* constant_pad_op_out); + +enum xnn_status xnn_reshape_constant_pad_nd_x32( + xnn_operator_t constant_pad_op, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_padding, + const size_t* post_padding, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_constant_pad_nd_x32( + xnn_operator_t constant_pad_op, + const void* input, + void* output); + +enum xnn_status xnn_run_constant_pad_nd_x32( + uint32_t flags, + size_t num_dims, + const size_t* input_shape, + const size_t* pre_paddings, + const size_t* post_paddings, + const void* input, + void* output, + const void* padding_value, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_f16_f32( + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_f16_f32( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_f16_f32( + xnn_operator_t convert_op, + const void* input, + float* output); + +enum xnn_status xnn_run_convert_nc_f16_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const void* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_f16_qd8( + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_f16_qd8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +// quantization_params must be padded with at least XNN_EXTRA_QUANTIZATION_PARAMS entries. +enum xnn_status xnn_setup_convert_nc_f16_qd8( + xnn_operator_t convert_op, + const void* input, + int8_t* output, + struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_create_convert_nc_f32_qd8( + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_f32_qd8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +// quantization_params must be padded with at least XNN_EXTRA_QUANTIZATION_PARAMS entries. +enum xnn_status xnn_setup_convert_nc_f32_qd8( + xnn_operator_t convert_op, + const float* input, + int8_t* output, + struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_create_convert_nc_f32_f16( + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_f32_f16( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_f32_f16( + xnn_operator_t convert_op, + const float* input, + void* output); + +enum xnn_status xnn_run_convert_nc_f32_f16( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + void* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_f32_qs8( + float output_scale, + int8_t output_zero_point, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_f32_qs8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_f32_qs8( + xnn_operator_t convert_op, + const float* input, + int8_t* output); + +enum xnn_status xnn_run_convert_nc_f32_qs8( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + int8_t* output, + float output_scale, + int8_t output_zero_point, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_f32_qu8( + float output_scale, + uint8_t output_zero_point, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_f32_qu8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_f32_qu8( + xnn_operator_t convert_op, + const float* input, + uint8_t* output); + +enum xnn_status xnn_run_convert_nc_f32_qu8( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + uint8_t* output, + float output_scale, + uint8_t output_zero_point, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_qs8( + float input_scale, + int8_t input_zero_point, + float output_scale, + int8_t output_zero_point, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_qs8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_qs8( + xnn_operator_t convert_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_convert_nc_qs8_f16( + float input_scale, + int8_t input_zero_point, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_qs8_f16( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_qs8_f16( + xnn_operator_t convert_op, + const int8_t* input, + void* output); + +enum xnn_status xnn_create_convert_nc_qs8_f32( + float input_scale, + int8_t input_zero_point, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_qs8_f32( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_qs8_f32( + xnn_operator_t convert_op, + const int8_t* input, + float* output); + +enum xnn_status xnn_run_convert_nc_qs8_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const int8_t* input, + float* output, + float input_scale, + int8_t input_zero_point, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_qs16_qs8( + float input_scale, + float output_scale, + int8_t output_zero_point, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_qs16_qs8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_qs16_qs8( + xnn_operator_t convert_op, + const int16_t* input, + int8_t* output); + +enum xnn_status xnn_run_convert_nc_qs16_qs8( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const int16_t* input, + int8_t* output, + float input_scale, + float output_scale, + int8_t output_zero_point, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convert_nc_qu8( + float input_scale, + uint8_t input_zero_point, + float output_scale, + uint8_t output_zero_point, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_qu8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_qu8( + xnn_operator_t convert_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_convert_nc_qu8_f32( + float input_scale, + uint8_t input_zero_point, + uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_qu8_f32( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convert_nc_qu8_f32( + xnn_operator_t convert_op, + const uint8_t* input, + float* output); + +enum xnn_status xnn_run_convert_nc_qu8_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const uint8_t* input, + float* output, + float input_scale, + uint8_t input_zero_point, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_convolution2d_nchw_f16( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + const void* kernel, + const void* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nchw_f16( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nchw_f16( + xnn_operator_t convolution_op, + const void* input, + void* output); + +enum xnn_status xnn_create_convolution2d_nchw_f32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + const float* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nchw_f32( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nchw_f32( + xnn_operator_t convolution_op, + const float* input, + float* output); + +enum xnn_status xnn_create_convolution2d_nhwc_f16( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + const void* kernel, + const void* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nhwc_f16( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nhwc_f16( + xnn_operator_t convolution_op, + void* workspace, + const void* input, + void* output); + +enum xnn_status xnn_create_convolution2d_nhwc_f32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + const float* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +// Forward declare. +struct xnn_post_operation; + +/// Create a convolution operator with a number of post operations. The +/// convolution operator created using this function does not have output_min +/// and output_max. The list of operators in post_operations will be applied in +/// order. Convolution with post operations is only supported on JIT platforms +/// and when JIT is enabled. +enum xnn_status xnn_create_fused_convolution2d_nhwc_f32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + const float* kernel, + const float* bias, + size_t num_post_operations, + struct xnn_post_operation* post_operations, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nhwc_f32( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nhwc_f32( + xnn_operator_t convolution_op, + void* workspace, + const float* input, + float* output); + +enum xnn_status xnn_create_convolution2d_nhwc_qd8_f16_qc8w( + uint32_t input_padding_top, uint32_t input_padding_right, + uint32_t input_padding_bottom, uint32_t input_padding_left, + uint32_t kernel_height, uint32_t kernel_width, uint32_t subsampling_height, + uint32_t subsampling_width, uint32_t dilation_height, + uint32_t dilation_width, uint32_t groups, size_t group_input_channels, + size_t group_output_channels, size_t input_channel_stride, + size_t output_channel_stride, const float* kernel_scale, + const int8_t* kernel, const float* bias, float output_min, float output_max, + uint32_t flags, xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_create_convolution2d_nhwc_qd8_f32_qc8w( + uint32_t input_padding_top, uint32_t input_padding_right, + uint32_t input_padding_bottom, uint32_t input_padding_left, + uint32_t kernel_height, uint32_t kernel_width, uint32_t subsampling_height, + uint32_t subsampling_width, uint32_t dilation_height, + uint32_t dilation_width, uint32_t groups, size_t group_input_channels, + size_t group_output_channels, size_t input_channel_stride, + size_t output_channel_stride, const float* kernel_scale, + const int8_t* kernel, const float* bias, float output_min, float output_max, + uint32_t flags, xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_create_convolution2d_nhwc_qs8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + int8_t input_zero_point, + float input_scale, + float kernel_scale, + const int8_t* kernel, + const int32_t* bias, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nhwc_qd8_f16_qc8w( + xnn_operator_t convolution_op, size_t batch_size, size_t input_height, + size_t input_width, size_t* workspace_size, size_t* workspace_alignment, + size_t* output_height_out, size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_reshape_convolution2d_nhwc_qd8_f32_qc8w( + xnn_operator_t convolution_op, size_t batch_size, size_t input_height, + size_t input_width, size_t* workspace_size, size_t* workspace_alignment, + size_t* output_height_out, size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_reshape_convolution2d_nhwc_qs8( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nhwc_qd8_f16_qc8w( + xnn_operator_t convolution_op, void* workspace, const int8_t* input, + void* output, + const struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_setup_convolution2d_nhwc_qd8_f32_qc8w( + xnn_operator_t convolution_op, void* workspace, const int8_t* input, + float* output, + const struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_setup_convolution2d_nhwc_qs8( + xnn_operator_t convolution_op, + void* workspace, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_convolution2d_nhwc_qs8_qc8w( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + int8_t input_zero_point, + float input_scale, + const float* kernel_scale, + const int8_t* kernel, + const int32_t* bias, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nhwc_qs8_qc8w( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nhwc_qs8_qc8w( + xnn_operator_t convolution_op, + void* workspace, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_convolution2d_nhwc_qu8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t subsampling_height, + uint32_t subsampling_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_channel_stride, + size_t output_channel_stride, + uint8_t input_zero_point, + float input_scale, + uint8_t kernel_zero_point, + float kernel_scale, + const uint8_t* kernel, + const int32_t* bias, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* convolution_op_out); + +enum xnn_status xnn_reshape_convolution2d_nhwc_qu8( + xnn_operator_t convolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* workspace_size, + size_t* workspace_alignment, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_convolution2d_nhwc_qu8( + xnn_operator_t convolution_op, + void* workspace, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_copy_nc_x8( + uint32_t flags, + xnn_operator_t* copy_op_out); + +enum xnn_status xnn_reshape_copy_nc_x8( + xnn_operator_t copy_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_copy_nc_x8( + xnn_operator_t copy_op, + const void* input, + void* output); + +enum xnn_status xnn_create_copy_nc_x16( + uint32_t flags, + xnn_operator_t* copy_op_out); + +enum xnn_status xnn_reshape_copy_nc_x16( + xnn_operator_t copy_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_copy_nc_x16( + xnn_operator_t copy_op, + const void* input, + void* output); + +enum xnn_status xnn_create_copy_nc_x32( + uint32_t flags, + xnn_operator_t* copy_op_out); + +enum xnn_status xnn_reshape_copy_nc_x32( + xnn_operator_t copy_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_copy_nc_x32( + xnn_operator_t copy_op, + const void* input, + void* output); + +enum xnn_status xnn_run_copy_nc_x32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const uint32_t* input, + uint32_t* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_deconvolution2d_nhwc_f16( + uint32_t output_padding_top, + uint32_t output_padding_right, + uint32_t output_padding_bottom, + uint32_t output_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + const void* kernel, + const void* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* deconvolution_op_out); + +enum xnn_status xnn_reshape_deconvolution2d_nhwc_f16( + xnn_operator_t deconvolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + uint32_t adjustment_height, + uint32_t adjustment_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_deconvolution2d_nhwc_f16( + xnn_operator_t deconvolution_op, + const void* input, + void* output); + +enum xnn_status xnn_create_deconvolution2d_nhwc_f32( + uint32_t output_padding_top, + uint32_t output_padding_right, + uint32_t output_padding_bottom, + uint32_t output_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + const float* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* deconvolution_op_out); + +enum xnn_status xnn_reshape_deconvolution2d_nhwc_f32( + xnn_operator_t deconvolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + uint32_t adjustment_height, + uint32_t adjustment_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_deconvolution2d_nhwc_f32( + xnn_operator_t deconvolution_op, + const float* input, + float* output); + +enum xnn_status xnn_create_deconvolution2d_nhwc_qs8( + uint32_t output_padding_top, + uint32_t output_padding_right, + uint32_t output_padding_bottom, + uint32_t output_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + int8_t input_zero_point, + float input_scale, + float kernel_scale, + const int8_t* kernel, + const int32_t* bias, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* deconvolution_op_out); + +enum xnn_status xnn_reshape_deconvolution2d_nhwc_qs8( + xnn_operator_t deconvolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + uint32_t adjustment_height, + uint32_t adjustment_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8( + xnn_operator_t deconvolution_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_deconvolution2d_nhwc_qu8( + uint32_t output_padding_top, + uint32_t output_padding_right, + uint32_t output_padding_bottom, + uint32_t output_padding_left, + uint32_t kernel_height, + uint32_t kernel_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint32_t groups, + size_t group_input_channels, + size_t group_output_channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + uint8_t input_zero_point, + float input_scale, + uint8_t kernel_zero_point, + float kernel_scale, + const uint8_t* kernel, + const int32_t* bias, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* deconvolution_op_out); + +enum xnn_status xnn_reshape_deconvolution2d_nhwc_qu8( + xnn_operator_t deconvolution_op, + size_t batch_size, + size_t input_height, + size_t input_width, + uint32_t adjustment_height, + uint32_t adjustment_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8( + xnn_operator_t deconvolution_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x16( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* depth_to_space_op_out); + +enum xnn_status xnn_reshape_depth_to_space_nchw2nhwc_x16( + xnn_operator_t depth_to_space_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x16( + xnn_operator_t depth_to_space_op, + const void* input, + void* output); + +enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* depth_to_space_op_out); + +enum xnn_status xnn_reshape_depth_to_space_nchw2nhwc_x32( + xnn_operator_t depth_to_space_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32( + xnn_operator_t depth_to_space_op, + const void* input, + void* output); + +enum xnn_status xnn_create_depth_to_space_nhwc_x8( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* depth_to_space_op_out); + +enum xnn_status xnn_reshape_depth_to_space_nhwc_x8( + xnn_operator_t depth_to_space_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_depth_to_space_nhwc_x8( + xnn_operator_t depth_to_space_op, + const void* input, + void* output); + +enum xnn_status xnn_create_depth_to_space_nhwc_x16( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* depth_to_space_op_out); + +enum xnn_status xnn_reshape_depth_to_space_nhwc_x16( + xnn_operator_t depth_to_space_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_depth_to_space_nhwc_x16( + xnn_operator_t depth_to_space_op, + const void* input, + void* output); + +enum xnn_status xnn_create_depth_to_space_nhwc_x32( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* depth_to_space_op_out); + +enum xnn_status xnn_reshape_depth_to_space_nhwc_x32( + xnn_operator_t depth_to_space_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_depth_to_space_nhwc_x32( + xnn_operator_t depth_to_space_op, + const void* input, + void* output); + +enum xnn_status xnn_create_divide_nd_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* divide_op_out); + +enum xnn_status xnn_reshape_divide_nd_f16( + xnn_operator_t divide_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_divide_nd_f16( + xnn_operator_t divide_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_divide_nd_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* divide_op_out); + +enum xnn_status xnn_reshape_divide_nd_f32( + xnn_operator_t divide_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_divide_nd_f32( + xnn_operator_t divide_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_divide_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + float output_min, + float output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_dynamic_fully_connected_nc_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* dynamic_fully_connected_op_out); + +enum xnn_status xnn_reshape_dynamic_fully_connected_nc_f16( + xnn_operator_t dynamic_fully_connected_op, + size_t batch_size, + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_dynamic_fully_connected_nc_f16( + xnn_operator_t dynamic_fully_connected_op, + void* workspace, + const void* input, + const void* kernel, + const void* bias, + void* output); + +enum xnn_status xnn_create_dynamic_fully_connected_nc_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* dynamic_fully_connected_op_out); + +enum xnn_status xnn_reshape_dynamic_fully_connected_nc_f32( + xnn_operator_t dynamic_fully_connected_op, + size_t batch_size, + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_dynamic_fully_connected_nc_f32( + xnn_operator_t dynamic_fully_connected_op, + void* workspace, + const float* input, + const float* kernel, + const float* bias, + float* output); + +enum xnn_status xnn_create_elu_nc_f16( + float alpha, + uint32_t flags, + xnn_operator_t* elu_op_out); + +enum xnn_status xnn_reshape_elu_nc_f16( + xnn_operator_t elu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_elu_nc_f16( + xnn_operator_t elu_op, + const void* input, + void* output); + +enum xnn_status xnn_create_elu_nc_f32( + float alpha, + uint32_t flags, + xnn_operator_t* elu_op_out); + +enum xnn_status xnn_reshape_elu_nc_f32( + xnn_operator_t elu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_elu_nc_f32( + xnn_operator_t elu_op, + const float* input, + float* output); + +enum xnn_status xnn_run_elu_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + float alpha, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_elu_nc_qs8( + float alpha, + int8_t input_zero_point, + float input_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* elu_op_out); + +enum xnn_status xnn_reshape_elu_nc_qs8( + xnn_operator_t elu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_elu_nc_qs8( + xnn_operator_t elu_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_floor_nc_f16( + uint32_t flags, + xnn_operator_t* floor_op_out); + +enum xnn_status xnn_reshape_floor_nc_f16( + xnn_operator_t floor_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_floor_nc_f16( + xnn_operator_t floor_op, + const void* input, + void* output); + +enum xnn_status xnn_create_floor_nc_f32( + uint32_t flags, + xnn_operator_t* floor_op_out); + +enum xnn_status xnn_reshape_floor_nc_f32( + xnn_operator_t floor_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_floor_nc_f32( + xnn_operator_t floor_op, + const float* input, + float* output); + +enum xnn_status xnn_run_floor_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_fully_connected_nc_f16( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + const void* kernel, + const void* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_f16( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_f16( + xnn_operator_t fully_connected_op, + const void* input, + void* output); + +enum xnn_status xnn_create_fully_connected_nc_f32( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + const float* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_f32( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_f32( + xnn_operator_t fully_connected_op, + const float* input, + float* output); + +enum xnn_status xnn_create_fully_connected_nc_f32_qc4w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + uint8_t kernel_zero_point, + const float* kernel_scale, + const uint8_t* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_f32_qc4w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_f32_qc4w( + xnn_operator_t fully_connected_op, + const float* input, + float* output); + +enum xnn_status xnn_create_fully_connected_nc_f32_qc8w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + const float* kernel_scale, + const int8_t* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_f32_qc8w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_f32_qc8w( + xnn_operator_t fully_connected_op, + const float* input, + float* output); + +enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qc4w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + uint8_t kernel_zero_point, + const float* kernel_scale, + const void* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qc4w( + xnn_operator_t fully_connected_op, + const int8_t* input, + void* output, + const struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qc4w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc4w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + uint8_t kernel_zero_point, + const float* kernel_scale, + const void* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc4w( + xnn_operator_t fully_connected_op, + const int8_t* input, + float* output, + const struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc4w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qc8w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + const float* kernel_scale, + const int8_t* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qc8w( + xnn_operator_t fully_connected_op, + const int8_t* input, + void* output, + const struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qc8w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc8w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + const float* kernel_scale, + const int8_t* kernel, + const float* bias, + float output_min, + float output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc8w( + xnn_operator_t fully_connected_op, + const int8_t* input, + float* output, + const struct xnn_dynamic_quantization_params* quantization_params); + +enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc8w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_fully_connected_nc_qs8( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + int8_t input_zero_point, + float input_scale, + float kernel_scale, + const int8_t* kernel, + const int32_t* bias, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_qs8( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_qs8( + xnn_operator_t fully_connected_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_fully_connected_nc_qs8_qc8w( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + int8_t input_zero_point, + float input_scale, + const float* kernel_scale, + const int8_t* kernel, + const int32_t* bias, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_qs8_qc8w( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_qs8_qc8w( + xnn_operator_t fully_connected_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_fully_connected_nc_qu8( + size_t input_channels, + size_t output_channels, + size_t input_stride, + size_t output_stride, + uint8_t input_zero_point, + float input_scale, + uint8_t kernel_zero_point, + float kernel_scale, + const uint8_t* kernel, + const int32_t* bias, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* fully_connected_op_out); + +enum xnn_status xnn_reshape_fully_connected_nc_qu8( + xnn_operator_t fully_connected_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_fully_connected_nc_qu8( + xnn_operator_t fully_connected_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_global_average_pooling_ncw_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* global_average_pooling_op_out); + +enum xnn_status xnn_reshape_global_average_pooling_ncw_f16( + xnn_operator_t global_average_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_average_pooling_ncw_f16( + xnn_operator_t global_average_pooling_op, + const void* input, + void* output); + +enum xnn_status xnn_create_global_average_pooling_ncw_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* global_average_pooling_op_out); + +enum xnn_status xnn_reshape_global_average_pooling_ncw_f32( + xnn_operator_t global_average_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_average_pooling_ncw_f32( + xnn_operator_t global_average_pooling_op, + const float* input, + float* output); + +enum xnn_status xnn_create_global_average_pooling_nwc_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* global_average_pooling_op_out); + +enum xnn_status xnn_reshape_global_average_pooling_nwc_f16( + xnn_operator_t global_average_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_average_pooling_nwc_f16( + xnn_operator_t global_average_pooling_op, + void* workspace, + const void* input, + void* output); + +enum xnn_status xnn_create_global_average_pooling_nwc_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* global_average_pooling_op_out); + +enum xnn_status xnn_reshape_global_average_pooling_nwc_f32( + xnn_operator_t global_average_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_average_pooling_nwc_f32( + xnn_operator_t global_average_pooling_op, + void* workspace, + const float* input, + float* output); + +enum xnn_status xnn_create_global_average_pooling_nwc_qs8( + int8_t input_zero_point, + float input_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* global_average_pooling_op_out); + +enum xnn_status xnn_reshape_global_average_pooling_nwc_qs8( + xnn_operator_t global_average_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( + xnn_operator_t global_average_pooling_op, + void* workspace, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_global_average_pooling_nwc_qu8( + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* global_average_pooling_op_out); + +enum xnn_status xnn_reshape_global_average_pooling_nwc_qu8( + xnn_operator_t global_average_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( + xnn_operator_t global_average_pooling_op, + void* workspace, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_global_sum_pooling_nwc_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* global_sum_pooling_op_out); + +enum xnn_status xnn_reshape_global_sum_pooling_nwc_f16( + xnn_operator_t global_sum_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_sum_pooling_nwc_f16( + xnn_operator_t global_sum_pooling_op, + void* workspace, + const void* input, + void* output); + +enum xnn_status xnn_create_global_sum_pooling_nwc_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* global_sum_pooling_op_out); + +enum xnn_status xnn_reshape_global_sum_pooling_nwc_f32( + xnn_operator_t global_sum_pooling_op, + size_t batch_size, + size_t width, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_global_sum_pooling_nwc_f32( + xnn_operator_t global_sum_pooling_op, + void* workspace, + const float* input, + float* output); + +enum xnn_status xnn_create_hardswish_nc_f16( + uint32_t flags, + xnn_operator_t* hardswish_op_out); + +enum xnn_status xnn_reshape_hardswish_nc_f16( + xnn_operator_t hardswish_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_hardswish_nc_f16( + xnn_operator_t hardswish_op, + const void* input, + void* output); + +enum xnn_status xnn_create_hardswish_nc_f32( + uint32_t flags, + xnn_operator_t* hardswish_op_out); + +enum xnn_status xnn_reshape_hardswish_nc_f32( + xnn_operator_t hardswish_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_hardswish_nc_f32( + xnn_operator_t hardswish_op, + const float* input, + float* output); + +enum xnn_status xnn_run_hardswish_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_leaky_relu_nc_f16( + float negative_slope, + uint32_t flags, + xnn_operator_t* leaky_relu_op_out); + +enum xnn_status xnn_reshape_leaky_relu_nc_f16( + xnn_operator_t leaky_relu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_leaky_relu_nc_f16( + xnn_operator_t leaky_relu_op, + const void* input, + void* output); + +enum xnn_status xnn_create_leaky_relu_nc_f32( + float negative_slope, + uint32_t flags, + xnn_operator_t* leaky_relu_op_out); + +enum xnn_status xnn_reshape_leaky_relu_nc_f32( + xnn_operator_t leaky_relu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_leaky_relu_nc_f32( + xnn_operator_t leaky_relu_op, + const float* input, + float* output); + +enum xnn_status xnn_run_leaky_relu_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + float negative_slope, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_leaky_relu_nc_qs8( + float negative_slope, + int8_t input_zero_point, + float input_scale, + int8_t output_zero_point, + float output_scale, + uint32_t flags, + xnn_operator_t* leaky_relu_op_out); + +enum xnn_status xnn_reshape_leaky_relu_nc_qs8( + xnn_operator_t leaky_relu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_leaky_relu_nc_qs8( + xnn_operator_t leaky_relu_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_leaky_relu_nc_qu8( + float negative_slope, + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint32_t flags, + xnn_operator_t* leaky_relu_op_out); + +enum xnn_status xnn_reshape_leaky_relu_nc_qu8( + xnn_operator_t leaky_relu_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_leaky_relu_nc_qu8( + xnn_operator_t leaky_relu_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_max_pooling2d_nhwc_f16( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* max_pooling_op_out); + +enum xnn_status xnn_reshape_max_pooling2d_nhwc_f16( + xnn_operator_t max_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_max_pooling2d_nhwc_f16( + xnn_operator_t max_pooling_op, + const void* input, + void* output); + +enum xnn_status xnn_create_max_pooling2d_nhwc_f32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* max_pooling_op_out); + +enum xnn_status xnn_reshape_max_pooling2d_nhwc_f32( + xnn_operator_t max_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_max_pooling2d_nhwc_f32( + xnn_operator_t max_pooling_op, + const float* input, + float* output); + +enum xnn_status xnn_create_max_pooling2d_nhwc_s8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* max_pooling_op_out); + +enum xnn_status xnn_reshape_max_pooling2d_nhwc_s8( + xnn_operator_t max_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_max_pooling2d_nhwc_s8( + xnn_operator_t max_pooling_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_max_pooling2d_nhwc_u8( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + uint32_t stride_height, + uint32_t stride_width, + uint32_t dilation_height, + uint32_t dilation_width, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* max_pooling_op_out); + +enum xnn_status xnn_reshape_max_pooling2d_nhwc_u8( + xnn_operator_t max_pooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_max_pooling2d_nhwc_u8( + xnn_operator_t max_pooling_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_maximum_nd_f16( + uint32_t flags, + xnn_operator_t* maximum_op_out); + +enum xnn_status xnn_reshape_maximum_nd_f16( + xnn_operator_t maximum_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_maximum_nd_f16( + xnn_operator_t maximum_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_maximum_nd_f32( + uint32_t flags, + xnn_operator_t* maximum_op_out); + +enum xnn_status xnn_reshape_maximum_nd_f32( + xnn_operator_t maximum_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_maximum_nd_f32( + xnn_operator_t maximum_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_maximum_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_mean_nd_f16( + uint32_t flags, + xnn_operator_t* mean_op_out); + +enum xnn_status xnn_reshape_mean_nd_f16( + xnn_operator_t mean_op, + size_t num_reduction_axes, + const size_t* reduction_axes, + size_t num_input_dims, + const size_t* input_shape, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_mean_nd_f16( + xnn_operator_t mean_op, + void* workspace, + const void* input, + void* output); + +enum xnn_status xnn_create_mean_nd_f32( + uint32_t flags, + xnn_operator_t* mean_op_out); + +enum xnn_status xnn_reshape_mean_nd_f32( + xnn_operator_t mean_op, + size_t num_reduction_axes, + const size_t* reduction_axes, + size_t num_input_dims, + const size_t* input_shape, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_mean_nd_f32( + xnn_operator_t mean_op, + void* workspace, + const float* input, + float* output); + +enum xnn_status xnn_create_minimum_nd_f16( + uint32_t flags, + xnn_operator_t* minimum_op_out); + +enum xnn_status xnn_reshape_minimum_nd_f16( + xnn_operator_t minimum_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_minimum_nd_f16( + xnn_operator_t minimum_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_minimum_nd_f32( + uint32_t flags, + xnn_operator_t* minimum_op_out); + +enum xnn_status xnn_reshape_minimum_nd_f32( + xnn_operator_t minimum_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_minimum_nd_f32( + xnn_operator_t minimum_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_minimum_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_multiply_nd_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* multiply_op_out); + +enum xnn_status xnn_reshape_multiply_nd_f16( + xnn_operator_t multiply_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_multiply_nd_f16( + xnn_operator_t multiply_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_multiply_nd_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* multiply_op_out); + +enum xnn_status xnn_reshape_multiply_nd_f32( + xnn_operator_t multiply_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_multiply_nd_f32( + xnn_operator_t multiply_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_multiply_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + float output_min, + float output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_multiply_nd_qs8( + int8_t input1_zero_point, + float input1_scale, + int8_t input2_zero_point, + float input2_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* multiply_op_out); + +enum xnn_status xnn_reshape_multiply_nd_qs8( + xnn_operator_t multiply_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_multiply_nd_qs8( + xnn_operator_t multiply_op, + const int8_t* input1, + const int8_t* input2, + int8_t* output); + +enum xnn_status xnn_run_multiply_nd_qs8( + size_t num_input1_dims, + const size_t* input1_shape, + int8_t input1_zero_point, + float input1_scale, + size_t num_input2_dims, + const size_t* input2_shape, + int8_t input2_zero_point, + float input2_scale, + const int8_t* input1, + const int8_t* input2, + int8_t* output, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_multiply_nd_qu8( + uint8_t input1_zero_point, + float input1_scale, + uint8_t input2_zero_point, + float input2_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* multiply_op_out); + +enum xnn_status xnn_reshape_multiply_nd_qu8( + xnn_operator_t multiply_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_multiply_nd_qu8( + xnn_operator_t multiply_op, + const uint8_t* input1, + const uint8_t* input2, + uint8_t* output); + +enum xnn_status xnn_run_multiply_nd_qu8( + size_t num_input1_dims, + const size_t* input1_shape, + uint8_t input1_zero_point, + float input1_scale, + size_t num_input2_dims, + const size_t* input2_shape, + uint8_t input2_zero_point, + float input2_scale, + const uint8_t* input1, + const uint8_t* input2, + uint8_t* output, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_negate_nc_f16( + uint32_t flags, + xnn_operator_t* negate_op_out); + +enum xnn_status xnn_reshape_negate_nc_f16( + xnn_operator_t negate_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_negate_nc_f16( + xnn_operator_t negate_op, + const void* input, + void* output); + +enum xnn_status xnn_create_negate_nc_f32( + uint32_t flags, + xnn_operator_t* negate_op_out); + +enum xnn_status xnn_reshape_negate_nc_f32( + xnn_operator_t negate_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_negate_nc_f32( + xnn_operator_t negate_op, + const float* input, + float* output); + +enum xnn_status xnn_run_negate_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_prelu_nc_f16( + size_t channels, + size_t input_stride, + size_t output_stride, + const void* negative_slope, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* prelu_op_out); + +enum xnn_status xnn_reshape_prelu_nc_f16( + xnn_operator_t prelu_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_prelu_nc_f16( + xnn_operator_t prelu_op, + const void* input, + void* output); + +enum xnn_status xnn_create_prelu_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + const float* negative_slope, + uint32_t flags, + xnn_code_cache_t code_cache, + xnn_weights_cache_t weights_cache, + xnn_operator_t* prelu_op_out); + +enum xnn_status xnn_reshape_prelu_nc_f32( + xnn_operator_t prelu_op, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_prelu_nc_f32( + xnn_operator_t prelu_op, + const float* input, + float* output); + +enum xnn_status xnn_create_resize_bilinear2d_nchw_f32( + size_t output_height, + size_t output_width, + uint32_t flags, + xnn_operator_t* resize_op_out); + +enum xnn_status xnn_reshape_resize_bilinear2d_nchw_f32( + xnn_operator_t resize_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32( + xnn_operator_t resize_op, + const float* input, + float* output); + +enum xnn_status xnn_create_resize_bilinear2d_nchw_f16( + size_t output_height, + size_t output_width, + uint32_t flags, + xnn_operator_t* resize_op_out); + +enum xnn_status xnn_reshape_resize_bilinear2d_nchw_f16( + xnn_operator_t resize_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_resize_bilinear2d_nchw_f16( + xnn_operator_t resize_op, + const void* input, + void* output); + +enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16( + size_t output_height, + size_t output_width, + uint32_t flags, + xnn_operator_t* resize_op_out); + +enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_f16( + xnn_operator_t resize_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16( + xnn_operator_t resize_op, + void* workspace, + const void* input, + void* output); + +enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32( + size_t output_height, + size_t output_width, + uint32_t flags, + xnn_operator_t* resize_op_out); + +enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_f32( + xnn_operator_t resize_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32( + xnn_operator_t resize_op, + void* workspace, + const float* input, + float* output); + +enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8( + size_t output_height, + size_t output_width, + uint32_t flags, + xnn_operator_t* resize_op_out); + +enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_s8( + xnn_operator_t resize_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8( + xnn_operator_t resize_op, + void* workspace, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8( + size_t output_height, + size_t output_width, + uint32_t flags, + xnn_operator_t* resize_op_out); + +enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_u8( + xnn_operator_t resize_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8( + xnn_operator_t resize_op, + void* workspace, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_rope_nthc_f16( + size_t max_tokens, + uint32_t flags, + xnn_operator_t* rope_op_out); + +enum xnn_status xnn_reshape_rope_nthc_f16( + xnn_operator_t rope_op, + size_t batch_size, + size_t tokens, + size_t heads, + size_t channels, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_rope_nthc_f16( + xnn_operator_t rope_op, + const void* input, + const void* weights, + void* output); + +enum xnn_status xnn_create_rope_nthc_f32( + size_t max_tokens, + uint32_t flags, + xnn_operator_t* rope_op_out); + +enum xnn_status xnn_reshape_rope_nthc_f32( + xnn_operator_t rope_op, + size_t batch_size, + size_t tokens, + size_t heads, + size_t channels, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_rope_nthc_f32( + xnn_operator_t rope_op, + const float* input, + const float* weights, + float* output); + +// N: batch size +// H: number of heads +// T: tokens (sequence length) +// C: channels (head dimension) +enum xnn_status xnn_create_scaled_dot_product_attention_nhtc_f16( + enum xnn_attention_logits_cap_type cap_type, + const void* cap_params, + uint32_t flags, + xnn_operator_t* attention_op_out); + +enum xnn_status xnn_reshape_scaled_dot_product_attention_nhtc_f16( + xnn_operator_t attention_op, + size_t batch_size, + size_t query_heads, + // Number of tokens in query. + size_t query_tokens, + size_t key_value_heads, + // Number of tokens in key/value. For self-attention, this is same as tokens. + size_t key_value_tokens, + size_t query_key_channels, + size_t value_channels, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +// Query is of dimension [batch_size, query_heads, query_tokens, channels]. +// Key and value are of dimension [batch_size, key_value_heads, key_value_tokens, channels]. +// Scale is of dimension [channels]. +// Mask is of dimension [query_tokens, key_value_tokens]. +enum xnn_status xnn_setup_scaled_dot_product_attention_nhtc_f16( + xnn_operator_t attention_op, + void* workspace, + const void* query, + const void* key, + const void* value, + const void* scale, + const void* mask, + void* output); + +// N: batch size +// H: number of heads +// T: tokens (sequence length) +// C: channels (head dimension) +enum xnn_status xnn_create_scaled_dot_product_attention_nhtc_f32( + enum xnn_attention_logits_cap_type cap_type, + const void* cap_params, + uint32_t flags, + xnn_operator_t* attention_op_out); + +enum xnn_status xnn_reshape_scaled_dot_product_attention_nhtc_f32( + xnn_operator_t attention_op, + size_t batch_size, + size_t query_heads, + // Number of tokens in query. + size_t query_tokens, + size_t key_value_heads, + // Number of tokens in key/value. For self-attention, this is same as tokens. + size_t key_value_tokens, + size_t query_key_channels, + size_t value_channels, + size_t* workspace_size, + size_t* workspace_alignment, + pthreadpool_t threadpool); + +// Query is of dimension [batch_size, query_heads, query_tokens, query_key_channels]. +// Key and value are of dimension [batch_size, key_value_heads, key_value_tokens, query_key_channels]. +// Scale is of dimension [query_key_channels]. +// Mask is of dimension [query_tokens, key_value_tokens]. +// Output is of dimension [batch_size, query_heads, query_tokens, value_channels]. +enum xnn_status xnn_setup_scaled_dot_product_attention_nhtc_f32( + xnn_operator_t attention_op, + void* workspace, + const float* query, + const float* key, + const float* value, + const float* scale, + const float* mask, + float* output); + +enum xnn_status xnn_create_sigmoid_nc_f16( + uint32_t flags, + xnn_operator_t* sigmoid_op_out); + +enum xnn_status xnn_reshape_sigmoid_nc_f16( + xnn_operator_t sigmoid_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_sigmoid_nc_f16( + xnn_operator_t sigmoid_op, + const void* input, + void* output); + +enum xnn_status xnn_create_sigmoid_nc_f32( + uint32_t flags, + xnn_operator_t* sigmoid_op_out); + +enum xnn_status xnn_reshape_sigmoid_nc_f32( + xnn_operator_t sigmoid_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_sigmoid_nc_f32( + xnn_operator_t sigmoid_op, + const float* input, + float* output); + +enum xnn_status xnn_run_sigmoid_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_sigmoid_nc_qs8( + int8_t input_zero_point, + float input_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* sigmoid_op_out); + +enum xnn_status xnn_reshape_sigmoid_nc_qs8( + xnn_operator_t sigmoid_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_sigmoid_nc_qs8( + xnn_operator_t sigmoid_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_sigmoid_nc_qu8( + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* sigmoid_op_out); + +enum xnn_status xnn_reshape_sigmoid_nc_qu8( + xnn_operator_t sigmoid_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_sigmoid_nc_qu8( + xnn_operator_t sigmoid_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_slice_nd_x16( + uint32_t flags, + xnn_operator_t* slice_op_out); + +enum xnn_status xnn_reshape_slice_nd_x16( + xnn_operator_t slice_op, + size_t num_dims, + const size_t* input_shape, + const size_t* offsets, + const size_t* sizes, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_slice_nd_x16( + xnn_operator_t slice_op, + const void* input, + void* output); + +enum xnn_status xnn_create_slice_nd_x32( + uint32_t flags, + xnn_operator_t* slice_op_out); + +enum xnn_status xnn_reshape_slice_nd_x32( + xnn_operator_t slice_op, + size_t num_dims, + const size_t* input_shape, + const size_t* offsets, + const size_t* sizes, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_slice_nd_x32( + xnn_operator_t slice_op, + const void* input, + void* output); + +enum xnn_status xnn_run_slice_nd_x32( + size_t num_dims, + const size_t* input_shape, + const size_t* offsets, + const size_t* sizes, + const void* input, + void* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_softmax_nc_f16( + uint32_t flags, + xnn_operator_t* softmax_op_out); + +enum xnn_status xnn_reshape_softmax_nc_f16( + xnn_operator_t softmax_op, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_softmax_nc_f16( + xnn_operator_t softmax_op, + const void* input, + void* output); + +enum xnn_status xnn_create_softmax_nc_f32( + uint32_t flags, + xnn_operator_t* softmax_op_out); + +enum xnn_status xnn_reshape_softmax_nc_f32( + xnn_operator_t softmax_op, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_softmax_nc_f32( + xnn_operator_t softmax_op, + const float* input, + float* output); + +enum xnn_status xnn_create_softmax_nc_qu8( + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint32_t flags, + xnn_operator_t* softmax_op_out); + +enum xnn_status xnn_reshape_softmax_nc_qu8( + xnn_operator_t softmax_op, + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_softmax_nc_qu8( + xnn_operator_t softmax_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_space_to_depth_nhwc_x16( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out); + +enum xnn_status xnn_reshape_space_to_depth_nhwc_x16( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_space_to_depth_nhwc_x16( + xnn_operator_t space_to_depth_op, + const void* input, + void* output); + +enum xnn_status xnn_create_space_to_depth_nhwc_x32( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out); + +enum xnn_status xnn_reshape_space_to_depth_nhwc_x32( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_space_to_depth_nhwc_x32( + xnn_operator_t space_to_depth_op, + const void* input, + void* output); + +enum xnn_status xnn_create_square_nc_f16( + uint32_t flags, + xnn_operator_t* square_op_out); + +enum xnn_status xnn_reshape_square_nc_f16( + xnn_operator_t square_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_square_nc_f16( + xnn_operator_t square_op, + const void* input, + void* output); + +enum xnn_status xnn_create_square_nc_f32( + uint32_t flags, + xnn_operator_t* square_op_out); + +enum xnn_status xnn_reshape_square_nc_f32( + xnn_operator_t square_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_square_nc_f32( + xnn_operator_t square_op, + const float* input, + float* output); + +enum xnn_status xnn_run_square_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_square_root_nc_f16( + uint32_t flags, + xnn_operator_t* sqrt_op_out); + +enum xnn_status xnn_reshape_square_root_nc_f16( + xnn_operator_t sqrt_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_square_root_nc_f16( + xnn_operator_t sqrt_op, + const void* input, + void* output); + +enum xnn_status xnn_create_square_root_nc_f32( + uint32_t flags, + xnn_operator_t* sqrt_op_out); + +enum xnn_status xnn_reshape_square_root_nc_f32( + xnn_operator_t sqrt_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_square_root_nc_f32( + xnn_operator_t sqrt_op, + const float* input, + float* output); + +enum xnn_status xnn_run_square_root_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_reciprocal_square_root_nc_f32( + uint32_t flags, xnn_operator_t* sqrt_op_out); + +enum xnn_status xnn_reshape_reciprocal_square_root_nc_f32( + xnn_operator_t sqrt_op, size_t batch_size, size_t channels, + size_t input_stride, size_t output_stride, pthreadpool_t threadpool); + +enum xnn_status xnn_setup_reciprocal_square_root_nc_f32(xnn_operator_t sqrt_op, + const float* input, + float* output); + +enum xnn_status xnn_run_reciprocal_square_root_nc_f32( + size_t channels, size_t input_stride, size_t output_stride, + size_t batch_size, const float* input, float* output, uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_squared_difference_nd_f16( + uint32_t flags, + xnn_operator_t* squared_difference_op_out); + +enum xnn_status xnn_reshape_squared_difference_nd_f16( + xnn_operator_t squared_difference_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_squared_difference_nd_f16( + xnn_operator_t squared_difference_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_squared_difference_nd_f32( + uint32_t flags, + xnn_operator_t* squared_difference_op_out); + +enum xnn_status xnn_reshape_squared_difference_nd_f32( + xnn_operator_t squared_difference_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_squared_difference_nd_f32( + xnn_operator_t squared_difference_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_squared_difference_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_subtract_nd_f16( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* subtract_op_out); + +enum xnn_status xnn_reshape_subtract_nd_f16( + xnn_operator_t subtract_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_subtract_nd_f16( + xnn_operator_t subtract_op, + const void* input1, + const void* input2, + void* output); + +enum xnn_status xnn_create_subtract_nd_f32( + float output_min, + float output_max, + uint32_t flags, + xnn_operator_t* subtract_op_out); + +enum xnn_status xnn_reshape_subtract_nd_f32( + xnn_operator_t subtract_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_subtract_nd_f32( + xnn_operator_t subtract_op, + const float* input1, + const float* input2, + float* output); + +enum xnn_status xnn_run_subtract_nd_f32( + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + const float* input1, + const float* input2, + float* output, + float output_min, + float output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_subtract_nd_qs8( + int8_t input1_zero_point, + float input1_scale, + int8_t input2_zero_point, + float input2_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* subtract_op_out); + +enum xnn_status xnn_reshape_subtract_nd_qs8( + xnn_operator_t subtract_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_subtract_nd_qs8( + xnn_operator_t subtract_op, + const int8_t* input1, + const int8_t* input2, + int8_t* output); + +enum xnn_status xnn_run_subtract_nd_qs8( + size_t num_input1_dims, + const size_t* input1_shape, + int8_t input1_zero_point, + float input1_scale, + size_t num_input2_dims, + const size_t* input2_shape, + int8_t input2_zero_point, + float input2_scale, + const int8_t* input1, + const int8_t* input2, + int8_t* output, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_subtract_nd_qu8( + uint8_t input1_zero_point, + float input1_scale, + uint8_t input2_zero_point, + float input2_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* subtract_op_out); + +enum xnn_status xnn_reshape_subtract_nd_qu8( + xnn_operator_t subtract_op, + size_t num_input1_dims, + const size_t* input1_shape, + size_t num_input2_dims, + const size_t* input2_shape, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_subtract_nd_qu8( + xnn_operator_t subtract_op, + const uint8_t* input1, + const uint8_t* input2, + uint8_t* output); + +enum xnn_status xnn_run_subtract_nd_qu8( + size_t num_input1_dims, + const size_t* input1_shape, + uint8_t input1_zero_point, + float input1_scale, + size_t num_input2_dims, + const size_t* input2_shape, + uint8_t input2_zero_point, + float input2_scale, + const uint8_t* input1, + const uint8_t* input2, + uint8_t* output, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_tanh_nc_f16( + uint32_t flags, + xnn_operator_t* tanh_op_out); + +enum xnn_status xnn_reshape_tanh_nc_f16( + xnn_operator_t tanh_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_tanh_nc_f16( + xnn_operator_t tanh_op, + const void* input, + void* output); + +enum xnn_status xnn_create_tanh_nc_f32( + uint32_t flags, + xnn_operator_t* tanh_op_out); + +enum xnn_status xnn_reshape_tanh_nc_f32( + xnn_operator_t tanh_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_tanh_nc_f32( + xnn_operator_t tanh_op, + const float* input, + float* output); + +enum xnn_status xnn_run_tanh_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_tanh_nc_qs8( + int8_t input_zero_point, + float input_scale, + int8_t output_zero_point, + float output_scale, + int8_t output_min, + int8_t output_max, + uint32_t flags, + xnn_operator_t* tanh_op_out); + +enum xnn_status xnn_reshape_tanh_nc_qs8( + xnn_operator_t tanh_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_tanh_nc_qs8( + xnn_operator_t tanh_op, + const int8_t* input, + int8_t* output); + +enum xnn_status xnn_create_tanh_nc_qu8( + uint8_t input_zero_point, + float input_scale, + uint8_t output_zero_point, + float output_scale, + uint8_t output_min, + uint8_t output_max, + uint32_t flags, + xnn_operator_t* tanh_op_out); + +enum xnn_status xnn_reshape_tanh_nc_qu8( + xnn_operator_t tanh_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_tanh_nc_qu8( + xnn_operator_t tanh_op, + const uint8_t* input, + uint8_t* output); + +enum xnn_status xnn_create_transpose_nd_x8( + uint32_t flags, + xnn_operator_t* transpose_op_out); + +enum xnn_status xnn_reshape_transpose_nd_x8( + xnn_operator_t transpose_op, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_transpose_nd_x8( + xnn_operator_t transpose_op, + const void* input, + void* output); + +enum xnn_status xnn_run_transpose_nd_x8( + const void* input, + void* output, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_transpose_nd_x16( + uint32_t flags, + xnn_operator_t* transpose_op_out); + +enum xnn_status xnn_reshape_transpose_nd_x16( + xnn_operator_t transpose_op, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_transpose_nd_x16( + xnn_operator_t transpose_op, + const void* input, + void* output); + +enum xnn_status xnn_run_transpose_nd_x16( + const void* input, + void* output, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_transpose_nd_x32( + uint32_t flags, + xnn_operator_t* transpose_op_out); + +enum xnn_status xnn_reshape_transpose_nd_x32( + xnn_operator_t transpose_op, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_transpose_nd_x32( + xnn_operator_t transpose_op, + const void* input, + void* output); + +enum xnn_status xnn_run_transpose_nd_x32( + const void* input, + void* output, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_transpose_nd_x64( + uint32_t flags, + xnn_operator_t* transpose_op_out); + +enum xnn_status xnn_reshape_transpose_nd_x64( + xnn_operator_t transpose_op, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_transpose_nd_x64( + xnn_operator_t transpose_op, + const void* input, + void* output); + +enum xnn_status xnn_run_transpose_nd_x64( + const void* input, + void* output, + size_t num_dims, + const size_t* input_shape, + const size_t* output_perm, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_truncation_nc_f16( + uint32_t flags, + xnn_operator_t* truncation_op_out); + +enum xnn_status xnn_reshape_truncation_nc_f16( + xnn_operator_t truncation_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_truncation_nc_f16( + xnn_operator_t truncation_op, + const void* input, + void* output); + +enum xnn_status xnn_create_truncation_nc_f32( + uint32_t flags, + xnn_operator_t* truncation_op_out); + +enum xnn_status xnn_reshape_truncation_nc_f32( + xnn_operator_t truncation_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_truncation_nc_f32( + xnn_operator_t truncation_op, + const float* input, + float* output); + +enum xnn_status xnn_run_truncation_nc_f32( + size_t channels, + size_t input_stride, + size_t output_stride, + size_t batch_size, + const float* input, + float* output, + uint32_t flags, + pthreadpool_t threadpool); + +enum xnn_status xnn_create_unpooling2d_nhwc_x32( + uint32_t input_padding_top, + uint32_t input_padding_right, + uint32_t input_padding_bottom, + uint32_t input_padding_left, + uint32_t pooling_height, + uint32_t pooling_width, + size_t channels, + size_t input_pixel_stride, + size_t output_pixel_stride, + uint32_t flags, + xnn_operator_t* unpooling_op_out); + +enum xnn_status xnn_reshape_unpooling2d_nhwc_x32( + xnn_operator_t unpooling_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t* output_height_out, + size_t* output_width_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_unpooling2d_nhwc_x32( + xnn_operator_t unpooling_op, + const void* input, + const uint32_t* index, + void* output); + +enum xnn_status xnn_create_slice_nd_x8( + uint32_t flags, + xnn_operator_t* slice_op_out); + +enum xnn_status xnn_reshape_slice_nd_x8( + xnn_operator_t slice_op, + size_t num_dims, + const size_t* input_shape, + const size_t* offsets, + const size_t* sizes, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_slice_nd_x8( + xnn_operator_t slice_op, + const void* input, + void* output); + +enum xnn_status xnn_create_space_to_depth_nhwc_x8( + uint32_t block_size, + uint32_t flags, + xnn_operator_t* space_to_depth_op_out); + +enum xnn_status xnn_reshape_space_to_depth_nhwc_x8( + xnn_operator_t space_to_depth_op, + size_t batch_size, + size_t input_height, + size_t input_width, + size_t input_channels, + size_t* output_height_out, + size_t* output_width_out, + size_t* output_channels_out, + pthreadpool_t threadpool); + +enum xnn_status xnn_setup_space_to_depth_nhwc_x8( + xnn_operator_t space_to_depth_op, + const void* input, + void* output); + +#ifdef __cplusplus +} // extern "C" +#endif