Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h +22 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h +46 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h +26 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h +183 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h +69 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h +53 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h +2 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h +153 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h +243 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h +26 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h +21 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h +28 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h +23 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h +21 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h +25 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h +27 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h +25 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h +39 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h +39 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h +22 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h +23 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h +28 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h +27 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h +30 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h +28 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h +27 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h +50 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h +28 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h +23 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h +91 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h +22 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h +22 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h +44 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h +33 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h +23 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h +39 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h +23 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h +25 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h +25 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h +26 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h +22 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h +27 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h +25 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h +30 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h +39 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h +28 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc
ADDED
|
Binary file (5.36 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's
|
| 4 |
+
// obvious if you forgot to include Config.h
|
| 5 |
+
// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined
|
| 6 |
+
//
|
| 7 |
+
// DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h
|
| 8 |
+
|
| 9 |
+
#define AT_MKLDNN_ENABLED() 1
|
| 10 |
+
#define AT_MKLDNN_ACL_ENABLED() 0
|
| 11 |
+
#define AT_MKL_ENABLED() 1
|
| 12 |
+
#define AT_MKL_SEQUENTIAL() 0
|
| 13 |
+
#define AT_POCKETFFT_ENABLED() 0
|
| 14 |
+
#define AT_NNPACK_ENABLED() 1
|
| 15 |
+
#define CAFFE2_STATIC_LINK_CUDA() 0
|
| 16 |
+
#define AT_BUILD_WITH_BLAS() 1
|
| 17 |
+
#define AT_BUILD_WITH_LAPACK() 1
|
| 18 |
+
#define AT_PARALLEL_OPENMP 1
|
| 19 |
+
#define AT_PARALLEL_NATIVE 0
|
| 20 |
+
#define AT_PARALLEL_NATIVE_TBB 0
|
| 21 |
+
#define AT_BLAS_F2C() 0
|
| 22 |
+
#define AT_BLAS_USE_CBLAS_DOT() 0
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <c10/macros/Macros.h>
|
| 4 |
+
#include <memory>
|
| 5 |
+
|
| 6 |
+
namespace at::functorch {
|
| 7 |
+
|
| 8 |
+
// NOTE [functorch TLS in pytorch/pytorch]
|
| 9 |
+
//
|
| 10 |
+
// functorch lives out-of-tree. However, it has some TLS that needs to be
|
| 11 |
+
// propagated. The solution for that is we store a pointer to the TLS
|
| 12 |
+
// inside pytorch/pytorch and extend FuncTorchTLSBase inside functorch to
|
| 13 |
+
// include whatever functorch needs.
|
| 14 |
+
//
|
| 15 |
+
// We need to store a pointer due to the indirection:
|
| 16 |
+
// inside functorch, we will create a subclass of FunctorchTLSBase called
|
| 17 |
+
// FuncTorchTLSImpl that actually contains metadata, like the DynamicLayerStack.
|
| 18 |
+
// FuncTorchTLSBase doesn't have any metadata because it hasn't been defined
|
| 19 |
+
// yet.
|
| 20 |
+
//
|
| 21 |
+
// Here in pytorch/pytorch, we will pass around FuncTorchTLSBase*, but inside
|
| 22 |
+
// functorch, we will assign a FuncTorchTLSImpl* to the FunctorchTLSBase*.
|
| 23 |
+
// We can't directly pass around FunctorchTLSBase (without a pointer) because
|
| 24 |
+
// FuncTorchTLSImpl does not fit inside a FuncTorchTLSBase by virtue of having
|
| 25 |
+
// more elements.
|
| 26 |
+
struct TORCH_API FuncTorchTLSBase {
|
| 27 |
+
virtual ~FuncTorchTLSBase() = default;
|
| 28 |
+
virtual std::unique_ptr<FuncTorchTLSBase> deepcopy() const = 0;
|
| 29 |
+
|
| 30 |
+
virtual int64_t checkSupportsSingleLevelAutogradFunction() const = 0;
|
| 31 |
+
virtual void checkSupportsCppAutogradFunction() const = 0;
|
| 32 |
+
virtual void checkSupportsInplaceRequiresGrad() const = 0;
|
| 33 |
+
virtual void checkSupportsRetainGrad() const = 0;
|
| 34 |
+
};
|
| 35 |
+
|
| 36 |
+
// returns deepcopy of the functorch tls
|
| 37 |
+
TORCH_API std::unique_ptr<FuncTorchTLSBase> getCopyOfFuncTorchTLS();
|
| 38 |
+
|
| 39 |
+
// sets the functorch tls. always does a deep copy.
|
| 40 |
+
TORCH_API void setFuncTorchTLS(
|
| 41 |
+
const std::shared_ptr<const FuncTorchTLSBase>& state);
|
| 42 |
+
|
| 43 |
+
// get a mutable reference to the functorch tls
|
| 44 |
+
TORCH_API std::unique_ptr<FuncTorchTLSBase>& functorchTLSAccessor();
|
| 45 |
+
|
| 46 |
+
} // namespace at::functorch
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <c10/core/impl/LocalDispatchKeySet.h>
|
| 4 |
+
|
| 5 |
+
namespace at::impl {
|
| 6 |
+
|
| 7 |
+
// VmapMode contains a thread local count of how many nested vmaps
|
| 8 |
+
// we are currently inside. That number is known as the `vmap level`.
|
| 9 |
+
// VmapMode is used in the implementation of the Python `torch.vmap` API.
|
| 10 |
+
//
|
| 11 |
+
// NOTE: this is NOT the c++ api for torch.vmap. That doesn't exist yet.
|
| 12 |
+
|
| 13 |
+
struct TORCH_API VmapMode {
|
| 14 |
+
// Returns the vmap level, aka the count of how many nested vmaps we're in.
|
| 15 |
+
static int64_t current_vmap_level();
|
| 16 |
+
|
| 17 |
+
// Increment the count of nested vmaps. If this causes the vmap level to be
|
| 18 |
+
// greater than 0, then it enables DispatchKey::VmapMode on all tensors.
|
| 19 |
+
static int64_t increment_nesting();
|
| 20 |
+
|
| 21 |
+
// Decrements the count of nested vmaps. If this causes the vmap level to be
|
| 22 |
+
// equal to 0, then it disables DispatchKey::VmapMode on all tensors.
|
| 23 |
+
static int64_t decrement_nesting();
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
} // namespace at::impl
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/LegacyBatchedTensorImpl.h>
|
| 4 |
+
#include <ATen/core/IListRef.h>
|
| 5 |
+
|
| 6 |
+
namespace at {
|
| 7 |
+
|
| 8 |
+
// This file contains abstractions used for transforming *logical* vmap
|
| 9 |
+
// arguments into *physical* arguments. (Keep reading for definitions of these
|
| 10 |
+
// terms).
|
| 11 |
+
|
| 12 |
+
// NOTE: [Logical vs physical args]
|
| 13 |
+
// Consider the following vmap.
|
| 14 |
+
// vmap(vmap(func, in_dims=(2,)), in_dims=(0,))(torch.ones(2, 3, 4))
|
| 15 |
+
// This would produce a BatchedTensor wrapping a Tensor of size [2, 3, 4],
|
| 16 |
+
// with batch dims 0 and 2:
|
| 17 |
+
// BatchedTensor(ones(2, 3, 4), bdims=[(lvl=1,dim=0),(lvl=2,dim=2)])
|
| 18 |
+
//
|
| 19 |
+
// We say the *logical* view of the tensor has size [3] -- tensors inside
|
| 20 |
+
// `func` appear to have size [3].
|
| 21 |
+
// However, the *physical* underlying tensor (the one passed to vmap) has size
|
| 22 |
+
// [2, 3, 4].
|
| 23 |
+
//
|
| 24 |
+
// This notion of logical vs physical also extends to non-tensor arguments.
|
| 25 |
+
// Consider the previous tensor; let's assume the user called
|
| 26 |
+
// `torch.sum(tensor, dim=0)` inside of `func`. Then the logical
|
| 27 |
+
// dimension they are reducing over is dim 0 but the physical dim is dim 1
|
| 28 |
+
// (the first non-batch dimension)
|
| 29 |
+
|
| 30 |
+
// Forward declared; see NOTE: [What is a VmapPhysicalView?]
|
| 31 |
+
struct VmapPhysicalView;
|
| 32 |
+
|
| 33 |
+
// Most PyTorch operators take 4 or fewer inputs.
|
| 34 |
+
constexpr int64_t kVmapTransformStaticInputSize = 4;
|
| 35 |
+
using VmapPhysicalViewVec =
|
| 36 |
+
SmallVector<VmapPhysicalView, kVmapTransformStaticInputSize>;
|
| 37 |
+
|
| 38 |
+
// Pytorch generally advertises good performance for <= 5 dims.
|
| 39 |
+
// (see ATen/core/DimVector.h). We add a few extra dims (~3) for vmap
|
| 40 |
+
// dimensions to get 8. Adjust this number as necessary
|
| 41 |
+
constexpr int64_t kVmapStaticDimVecSize = 8;
|
| 42 |
+
using VmapDimVector = SmallVector<int64_t, kVmapStaticDimVecSize>;
|
| 43 |
+
using VmapSymDimVector = SmallVector<c10::SymInt, kVmapStaticDimVecSize>;
|
| 44 |
+
|
| 45 |
+
// NOTE: [What is an VmapTransform?]
|
| 46 |
+
// An *VmapTransform* converts logical views of tensors to physical views.
|
| 47 |
+
//
|
| 48 |
+
// Batching rules use VmapTransforms to convert logical arguments to
|
| 49 |
+
// physical arguments, then call one or more at:: operator that handles the
|
| 50 |
+
// physical arguments, and then converts the physical result back to a logical
|
| 51 |
+
// argument.
|
| 52 |
+
|
| 53 |
+
// VmapTransform for operators that take tensors with multiple batch dims.
|
| 54 |
+
// Given one or more logical views on Tensors, `logicalToPhysical`
|
| 55 |
+
// permutes all of the batch dims to the front of the tensor, aligns
|
| 56 |
+
// and expands the batch dims to match each other (according to their `level`),
|
| 57 |
+
// and returns a VmapPhysicalView on the tensor(s).
|
| 58 |
+
struct TORCH_API MultiBatchVmapTransform {
|
| 59 |
+
static VmapPhysicalView logicalToPhysical(const Tensor& logical_tensor);
|
| 60 |
+
static VmapPhysicalViewVec logicalToPhysical(ITensorListRef logical_tensors);
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
// VmapTransform for operators that broadcast all inputs.
|
| 64 |
+
// Given some logical views on Tensors, `logicalToPhysical`:
|
| 65 |
+
// - permutes all of the batch dims to the front of the tensors
|
| 66 |
+
// - aligns all the batch dims to the collective levels of all of the tensors.
|
| 67 |
+
// If a tensor does not have a batch dim for a vmap level, then it receives
|
| 68 |
+
// a size-one dimension for said level.
|
| 69 |
+
// - aligns the non-batch dims to have the same dimensionality, adding extra
|
| 70 |
+
// size-1 dimensions in between the batch dimensions and the non-batch
|
| 71 |
+
// dimensions so that the batch dimensions are lined up from the right.
|
| 72 |
+
//
|
| 73 |
+
// For example: given inputs of size (B, 2) and (B, 3, 2) where B is the batch
|
| 74 |
+
// dimension, BroadcastingVmapTransform returns VmapPhysicalViews that wrap
|
| 75 |
+
// tensors of size (B, 1, 2) and (B, 3, 2).
|
| 76 |
+
//
|
| 77 |
+
// Given inputs of size (B, 2) and (2,), BroadcastingVmapTransform returns
|
| 78 |
+
// VmapPhysicalViews wrapping tensors of size (B, 2) and (1, 2). We don't
|
| 79 |
+
// actually *need* to return a tensor of size (1, 2) for the second tensor
|
| 80 |
+
// because the broadcasting operation takes care of that for us, but we do
|
| 81 |
+
// it anyways to keep things simple.
|
| 82 |
+
struct TORCH_API BroadcastingVmapTransform {
|
| 83 |
+
static VmapPhysicalViewVec logicalToPhysical(TensorList logical_tensors);
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
// Forward declared, if you're reading this file head to toe, don't worry about
|
| 87 |
+
// it yet.
|
| 88 |
+
struct VmapPhysicalToLogicalMap;
|
| 89 |
+
|
| 90 |
+
// NOTE: [What is a VmapPhysicalView?]
|
| 91 |
+
// VmapPhysicalView represents a physical view on a Tensor.
|
| 92 |
+
//
|
| 93 |
+
// One can use it to further convert logical dimension indices, logical shapes,
|
| 94 |
+
// and more to their physical variants, or convert a new (physical) tensor into
|
| 95 |
+
// a logical BatchedTensor. (TODO(rzou): some of these are not yet implemented).
|
| 96 |
+
//
|
| 97 |
+
// VmapPhysicalView stores a physical tensor with all of its batch dimensions at
|
| 98 |
+
// the front and some levels that correspond to said batch dimensions.
|
| 99 |
+
//
|
| 100 |
+
// The levels bitset specifies which vmap levels correspond to the batch
|
| 101 |
+
// dimensions at the front of the tensor. In particular, the number of set bits
|
| 102 |
+
// corresponds to the number of batch dimensions on `tensor` and the rightmost
|
| 103 |
+
// bit of `levels` specifies the maximum number of nested vmaps we are in at
|
| 104 |
+
// this point in time.
|
| 105 |
+
// For example, given:
|
| 106 |
+
// physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5, 6), levels={1, 3})
|
| 107 |
+
//
|
| 108 |
+
// Rightmost bit of `levels` is 3 indicating the number of nested vmaps less
|
| 109 |
+
// than or equal to 3.
|
| 110 |
+
// bitset: 010100
|
| 111 |
+
// ^
|
| 112 |
+
// |
|
| 113 |
+
// levels: 012345
|
| 114 |
+
struct TORCH_API VmapPhysicalView {
|
| 115 |
+
VmapPhysicalView(Tensor&& tensor, std::bitset<kVmapNumLevels> levels)
|
| 116 |
+
: levels_(levels), tensor_(std::move(tensor)) {
|
| 117 |
+
TORCH_INTERNAL_ASSERT(!isBatchedTensor(tensor_));
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
Tensor& tensor() {
|
| 121 |
+
return tensor_;
|
| 122 |
+
}
|
| 123 |
+
const Tensor& tensor() const {
|
| 124 |
+
return tensor_;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
// Maps logical dim indices to physical dim indices. Also does dim wrapping.
|
| 128 |
+
//
|
| 129 |
+
// For example, given:
|
| 130 |
+
// physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5), levels={1, 3})
|
| 131 |
+
//
|
| 132 |
+
// Then physical_view.getPhysicalDims({0, 1}) returns {2, 3}.
|
| 133 |
+
// This is because the size of levels tell us that the first two dimensions
|
| 134 |
+
// of `tensor_` are batch dimensions, so a logical dim of `n` is actually
|
| 135 |
+
// a physical dim of `n + 2`.
|
| 136 |
+
VmapDimVector getPhysicalDims(OptionalIntArrayRef logical_dims) const;
|
| 137 |
+
int64_t getPhysicalDim(int64_t logical_dim) const;
|
| 138 |
+
|
| 139 |
+
// Returns a VmapPhysicalToLogicalMap object. This can be used for
|
| 140 |
+
// mapping a physical tensor to a new logical tensor (BatchedTensor)
|
| 141 |
+
VmapPhysicalToLogicalMap getPhysicalToLogicalMap() const;
|
| 142 |
+
|
| 143 |
+
// Maps a logical shape to a physical shape by pre-pending the batch
|
| 144 |
+
// sizes to the logical shape.
|
| 145 |
+
VmapDimVector getPhysicalShape(IntArrayRef logical_shape) const;
|
| 146 |
+
|
| 147 |
+
int64_t numBatchDims() const;
|
| 148 |
+
|
| 149 |
+
private:
|
| 150 |
+
int64_t numLogicalDims() const;
|
| 151 |
+
|
| 152 |
+
std::bitset<kVmapNumLevels> levels_;
|
| 153 |
+
Tensor tensor_;
|
| 154 |
+
};
|
| 155 |
+
|
| 156 |
+
// Convenience struct used for mapping a physical tensor (a non-BatchedTensor)
|
| 157 |
+
// to a logical one (BatchedTensor). It holds some levels that are used to do
|
| 158 |
+
// the mapping and assumes that the batch dimensions in the physical tensor all
|
| 159 |
+
// occur at the front of the tensor.
|
| 160 |
+
struct TORCH_API VmapPhysicalToLogicalMap {
|
| 161 |
+
VmapPhysicalToLogicalMap(std::bitset<kVmapNumLevels> levels)
|
| 162 |
+
: levels_(levels) {}
|
| 163 |
+
|
| 164 |
+
// Maps a physical tensor to a new logical tensor (BatchedTensor).
|
| 165 |
+
// Assumes that all of the "batch dimensions" are at the front
|
| 166 |
+
// of the physical tensor. For example, given:
|
| 167 |
+
// - x = rank-4 Tensor with size 2, 3, 5, 7
|
| 168 |
+
// - levels = (2, 4)
|
| 169 |
+
// Returns:
|
| 170 |
+
// - BatchedTensor(x, bdims=[(dim=0,lvl=2), (dim=1, lvl=4)])
|
| 171 |
+
Tensor apply(const Tensor& physical_tensor) const;
|
| 172 |
+
|
| 173 |
+
// Given a vector of physical tensors,
|
| 174 |
+
// 1. maps each tensor to a new logical tensor. Assumes that all of the
|
| 175 |
+
// "batch dimensions" are at the front of the physical tensors.
|
| 176 |
+
// 2. stores the new logical tensors back into the passed-in vector. This is
|
| 177 |
+
// to avoid additional dynamic allocations.
|
| 178 |
+
void applyInplace(std::vector<Tensor>& physical_tensors) const;
|
| 179 |
+
|
| 180 |
+
std::bitset<kVmapNumLevels> levels_;
|
| 181 |
+
};
|
| 182 |
+
|
| 183 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <c10/core/ScalarType.h>
|
| 4 |
+
#include <c10/util/BFloat16.h>
|
| 5 |
+
#include <c10/util/Exception.h>
|
| 6 |
+
#include <c10/util/Float8_e4m3fn.h>
|
| 7 |
+
#include <c10/util/Float8_e4m3fnuz.h>
|
| 8 |
+
#include <c10/util/Float8_e5m2.h>
|
| 9 |
+
#include <c10/util/Float8_e5m2fnuz.h>
|
| 10 |
+
#include <c10/util/Half.h>
|
| 11 |
+
|
| 12 |
+
namespace at {
|
| 13 |
+
|
| 14 |
+
// For FP16 or BFloat16 inputs, ops should perform internal math in FP32.
|
| 15 |
+
template <typename scalar_t>
|
| 16 |
+
struct OpMathType {
|
| 17 |
+
using type = scalar_t;
|
| 18 |
+
};
|
| 19 |
+
template <>
|
| 20 |
+
struct OpMathType<at::Half> {
|
| 21 |
+
using type = float;
|
| 22 |
+
};
|
| 23 |
+
template <>
|
| 24 |
+
struct OpMathType<at::BFloat16> {
|
| 25 |
+
using type = float;
|
| 26 |
+
};
|
| 27 |
+
template <>
|
| 28 |
+
struct OpMathType<at::Float8_e5m2> {
|
| 29 |
+
using type = float;
|
| 30 |
+
};
|
| 31 |
+
template <>
|
| 32 |
+
struct OpMathType<at::Float8_e4m3fn> {
|
| 33 |
+
using type = float;
|
| 34 |
+
};
|
| 35 |
+
template <>
|
| 36 |
+
struct OpMathType<at::Float8_e5m2fnuz> {
|
| 37 |
+
using type = float;
|
| 38 |
+
};
|
| 39 |
+
template <>
|
| 40 |
+
struct OpMathType<at::Float8_e4m3fnuz> {
|
| 41 |
+
using type = float;
|
| 42 |
+
};
|
| 43 |
+
template <>
|
| 44 |
+
struct OpMathType<c10::complex<Half>> {
|
| 45 |
+
using type = c10::complex<float>;
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
template <typename T>
|
| 49 |
+
using opmath_type = typename OpMathType<T>::type;
|
| 50 |
+
|
| 51 |
+
namespace {
|
| 52 |
+
|
| 53 |
+
inline c10::ScalarType toOpMathType(const c10::ScalarType type) {
|
| 54 |
+
switch (type) {
|
| 55 |
+
#define DEFINE_CASE(scalar_t, TypeNum) \
|
| 56 |
+
case ScalarType::TypeNum: \
|
| 57 |
+
return CppTypeToScalarType<at::opmath_type<scalar_t>>::value;
|
| 58 |
+
|
| 59 |
+
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CASE)
|
| 60 |
+
#undef DEFINE_CASE
|
| 61 |
+
|
| 62 |
+
default:
|
| 63 |
+
TORCH_INTERNAL_ASSERT(false, "Unrecognized ScalarType: ", type);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
} // namespace
|
| 68 |
+
|
| 69 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/Tensor.h>
|
| 4 |
+
#include <c10/core/Scalar.h>
|
| 5 |
+
|
| 6 |
+
#ifndef AT_PER_OPERATOR_HEADERS
|
| 7 |
+
#include <ATen/Functions.h>
|
| 8 |
+
#else
|
| 9 |
+
#include <ATen/ops/scalar_tensor.h>
|
| 10 |
+
#endif
|
| 11 |
+
|
| 12 |
+
namespace at::detail {
|
| 13 |
+
// When filling a number to 1-element CPU tensor, we want to skip
|
| 14 |
+
// everything but manipulate data ptr directly.
|
| 15 |
+
// Ideally this fast pass should be implemented in TensorIterator,
|
| 16 |
+
// but we also want to skip compute_types which in not avoidable
|
| 17 |
+
// in TensorIterator for now.
|
| 18 |
+
Tensor& scalar_fill(Tensor& self, const Scalar& value);
|
| 19 |
+
TORCH_API Tensor scalar_tensor_static(
|
| 20 |
+
const Scalar& s,
|
| 21 |
+
c10::optional<ScalarType> dtype_opt,
|
| 22 |
+
c10::optional<Device> device_opt);
|
| 23 |
+
} // namespace at::detail
|
| 24 |
+
|
| 25 |
+
// This is in the c10 namespace because we use ADL to find the functions in it.
|
| 26 |
+
namespace c10 {
|
| 27 |
+
|
| 28 |
+
// FIXME: this should be (and was) Scalar::toTensor, but there is currently no
|
| 29 |
+
// way to implement this without going through Derived Types (which are not part
|
| 30 |
+
// of core).
|
| 31 |
+
inline at::Tensor scalar_to_tensor(
|
| 32 |
+
const Scalar& s,
|
| 33 |
+
const Device device = at::kCPU) {
|
| 34 |
+
// This is the fast track we have for CPU scalar tensors.
|
| 35 |
+
if (device == at::kCPU) {
|
| 36 |
+
return at::detail::scalar_tensor_static(s, s.type(), at::kCPU);
|
| 37 |
+
}
|
| 38 |
+
return at::scalar_tensor(s, at::device(device).dtype(s.type()));
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
} // namespace c10
|
| 42 |
+
|
| 43 |
+
namespace at::native {
|
| 44 |
+
|
| 45 |
+
inline Tensor wrapped_scalar_tensor(
|
| 46 |
+
const Scalar& scalar,
|
| 47 |
+
const Device device = at::kCPU) {
|
| 48 |
+
auto tensor = scalar_to_tensor(scalar, device);
|
| 49 |
+
tensor.unsafeGetTensorImpl()->set_wrapped_number(true);
|
| 50 |
+
return tensor;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
} // namespace at::native
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <c10/core/Storage.h>
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/core/IListRef.h>
|
| 4 |
+
#include <ATen/core/Tensor.h>
|
| 5 |
+
#include <c10/core/TensorImpl.h>
|
| 6 |
+
#include <c10/core/WrapDimMinimal.h>
|
| 7 |
+
#include <c10/util/irange.h>
|
| 8 |
+
|
| 9 |
+
namespace at {
|
| 10 |
+
|
| 11 |
+
// if dim_post_expr is 0 and wrap_scalar is true, then dim must be in the
|
| 12 |
+
// range [-1, 0]. This is a special case for scalar tensors and manifests in
|
| 13 |
+
// e.g. torch.sum(scalar_tensor, 0) Otherwise, dim should be in the range
|
| 14 |
+
// [-dim_post_expr, dim_post_expr-1].
|
| 15 |
+
using c10::maybe_wrap_dim;
|
| 16 |
+
|
| 17 |
+
inline int64_t maybe_wrap_dim(int64_t dim, TensorImpl* tensor) {
|
| 18 |
+
return maybe_wrap_dim(dim, tensor->dim());
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
inline int64_t maybe_wrap_dim(int64_t dim, TensorList tensors) {
|
| 22 |
+
if (tensors.empty()) {
|
| 23 |
+
// can't wrap empty TensorList; rely on underlying implementation to throw
|
| 24 |
+
// error if necessary.
|
| 25 |
+
return dim;
|
| 26 |
+
}
|
| 27 |
+
return maybe_wrap_dim(dim, tensors[0].dim());
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
inline int64_t maybe_wrap_dim(
|
| 31 |
+
int64_t dim,
|
| 32 |
+
const std::vector<std::vector<int64_t>>& tensor_sizes) {
|
| 33 |
+
if (tensor_sizes.empty()) {
|
| 34 |
+
// can't wrap empty list; rely on underlying implementation to throw error
|
| 35 |
+
// if necessary
|
| 36 |
+
return dim;
|
| 37 |
+
}
|
| 38 |
+
return maybe_wrap_dim(dim, tensor_sizes[0].size());
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
// Given an array of dimensions `dims` of length `ndims`, this function "Wraps"
|
| 42 |
+
// each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be
|
| 43 |
+
// specified using negative indices.
|
| 44 |
+
//
|
| 45 |
+
// Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will
|
| 46 |
+
// allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for
|
| 47 |
+
// dimensions not in the range [-dim_post_expr, dim_post_expr).
|
| 48 |
+
inline void maybe_wrap_dims_n(
|
| 49 |
+
int64_t* dims,
|
| 50 |
+
int64_t ndims,
|
| 51 |
+
int64_t dim_post_expr,
|
| 52 |
+
bool wrap_scalars = true) {
|
| 53 |
+
if (dim_post_expr <= 0) {
|
| 54 |
+
if (wrap_scalars) {
|
| 55 |
+
dim_post_expr = 1; // this will make range [-1, 0]
|
| 56 |
+
} else {
|
| 57 |
+
TORCH_CHECK_INDEX(
|
| 58 |
+
ndims == 0,
|
| 59 |
+
"Dimension specified as ",
|
| 60 |
+
dims[0],
|
| 61 |
+
" but tensor has no dimensions");
|
| 62 |
+
return;
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
int64_t min = -dim_post_expr;
|
| 66 |
+
int64_t max = dim_post_expr - 1;
|
| 67 |
+
for (const auto i : c10::irange(ndims)) {
|
| 68 |
+
auto& dim = dims[i];
|
| 69 |
+
if (dim < min || dim > max) {
|
| 70 |
+
TORCH_CHECK_INDEX(
|
| 71 |
+
false,
|
| 72 |
+
"Dimension out of range (expected to be in range of [",
|
| 73 |
+
min,
|
| 74 |
+
", ",
|
| 75 |
+
max,
|
| 76 |
+
"], but got ",
|
| 77 |
+
dim,
|
| 78 |
+
")");
|
| 79 |
+
}
|
| 80 |
+
if (dim < 0)
|
| 81 |
+
dim += dim_post_expr;
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
// Given a contiguous container of dimensions `dims`, this function "Wraps"
|
| 86 |
+
// each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be
|
| 87 |
+
// specified using negative indices.
|
| 88 |
+
//
|
| 89 |
+
// Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will
|
| 90 |
+
// allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for
|
| 91 |
+
// dimensions not in the range [-dim_post_expr, dim_post_expr).
|
| 92 |
+
template <typename Container>
|
| 93 |
+
inline void maybe_wrap_dims(
|
| 94 |
+
Container& dims,
|
| 95 |
+
int64_t dim_post_expr,
|
| 96 |
+
bool wrap_scalars = true) {
|
| 97 |
+
return maybe_wrap_dims_n(
|
| 98 |
+
dims.data(), dims.size(), dim_post_expr, wrap_scalars);
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
// previously, size [0] tensors were the only possible empty tensors; thus, it
|
| 102 |
+
// wasn't possible to cat empty tensors unless all the other tensors were
|
| 103 |
+
// 1-dimensional, so we allowed these tensors to be "skipped" (both for wrap
|
| 104 |
+
// dimension behavior and dimension size checking). We maintain this behavior
|
| 105 |
+
// for backwards compatibility, but only for this specific size (i.e. other
|
| 106 |
+
// empty sizes are not skipped).
|
| 107 |
+
template <typename T>
|
| 108 |
+
inline int64_t _legacy_cat_wrap_dim(
|
| 109 |
+
int64_t dim,
|
| 110 |
+
const std::vector<std::vector<T>>& tensor_sizes) {
|
| 111 |
+
for (auto& sizes : tensor_sizes) {
|
| 112 |
+
if (sizes.size() == 1 && sizes[0] == 0) {
|
| 113 |
+
continue;
|
| 114 |
+
}
|
| 115 |
+
return maybe_wrap_dim(dim, sizes.size());
|
| 116 |
+
}
|
| 117 |
+
return dim;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
inline int64_t legacy_cat_wrap_dim(
|
| 121 |
+
int64_t dim,
|
| 122 |
+
const std::vector<std::vector<int64_t>>& tensor_sizes) {
|
| 123 |
+
return _legacy_cat_wrap_dim<int64_t>(dim, tensor_sizes);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
inline int64_t legacy_cat_wrap_dim_symint(
|
| 127 |
+
int64_t dim,
|
| 128 |
+
const std::vector<std::vector<c10::SymInt>>& tensor_sizes) {
|
| 129 |
+
return _legacy_cat_wrap_dim<c10::SymInt>(dim, tensor_sizes);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
inline int64_t legacy_cat_wrap_dim(
|
| 133 |
+
int64_t dim,
|
| 134 |
+
const MaterializedITensorListRef& tensors) {
|
| 135 |
+
for (const Tensor& tensor : tensors) {
|
| 136 |
+
if (tensor.dim() == 1 && tensor.sizes()[0] == 0) {
|
| 137 |
+
continue;
|
| 138 |
+
}
|
| 139 |
+
return maybe_wrap_dim(dim, tensor.dim());
|
| 140 |
+
}
|
| 141 |
+
return dim;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
// wrap negative dims in a vector
|
| 145 |
+
inline void wrap_all_dims(
|
| 146 |
+
std::vector<int64_t>& dims_to_wrap,
|
| 147 |
+
int64_t tensor_total_dims) {
|
| 148 |
+
for (const auto i : c10::irange(dims_to_wrap.size())) {
|
| 149 |
+
dims_to_wrap[i] = maybe_wrap_dim(dims_to_wrap[i], tensor_total_dims);
|
| 150 |
+
}
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <c10/util/irange.h>
|
| 4 |
+
|
| 5 |
+
#include <sstream>
|
| 6 |
+
#include <string>
|
| 7 |
+
#include <unordered_map>
|
| 8 |
+
#include <vector>
|
| 9 |
+
|
| 10 |
+
namespace at::jit {
|
| 11 |
+
|
| 12 |
+
// A template environment is a mapping from template variable names, e.g.,
|
| 13 |
+
// identifier (corresponding to $identifier) to their expansions.
|
| 14 |
+
//
|
| 15 |
+
// This template environment supports storing strings, numbers and lists
|
| 16 |
+
// of strings, and can be chained together (so that lookup proceeds in
|
| 17 |
+
// in the top level environment, and then recurses into a parent
|
| 18 |
+
// environment if the key is not found.)
|
| 19 |
+
struct TemplateEnv {
|
| 20 |
+
TemplateEnv() = default;
|
| 21 |
+
TemplateEnv(TemplateEnv& parent) : parent(&parent) {}
|
| 22 |
+
|
| 23 |
+
using string_list = std::vector<std::string>;
|
| 24 |
+
|
| 25 |
+
// Add a string 'v' to the map at key 'k'.
|
| 26 |
+
void s(const std::string& k, const std::string& v) {
|
| 27 |
+
strings_[k] = v;
|
| 28 |
+
lists_.erase(k);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
// Add a number 'v' to the map at key 'k'
|
| 32 |
+
template <typename T>
|
| 33 |
+
void d(const std::string& k, const T& v) {
|
| 34 |
+
strings_[k] = c10::to_string(v);
|
| 35 |
+
lists_.erase(k);
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
// Retrieve the string representation of the value stored at 'k' from the map.
|
| 39 |
+
// Raises an exception if the key is not found.
|
| 40 |
+
const std::string& s(const std::string& k) const {
|
| 41 |
+
if (strings_.count(k) == 0) {
|
| 42 |
+
if (parent) {
|
| 43 |
+
return parent->s(k);
|
| 44 |
+
}
|
| 45 |
+
notFound(k);
|
| 46 |
+
}
|
| 47 |
+
return strings_.at(k);
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// Store a list of strings 'v' in the map at 'k'.
|
| 51 |
+
void v(const std::string& k, const string_list& v) {
|
| 52 |
+
lists_[k] = v;
|
| 53 |
+
strings_.erase(k);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// Retrieve a list of strings stored at 'k' from the map.
|
| 57 |
+
// Raises an exception if the key is not found.
|
| 58 |
+
const string_list& v(const std::string& k) const {
|
| 59 |
+
if (lists_.count(k) == 0) {
|
| 60 |
+
if (parent) {
|
| 61 |
+
return parent->v(k);
|
| 62 |
+
}
|
| 63 |
+
notFound(k);
|
| 64 |
+
}
|
| 65 |
+
return lists_.at(k);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
// Test if a string 'k' is a string (as opposed to a list.)
|
| 69 |
+
bool keyIsString(const std::string& k) const {
|
| 70 |
+
if (strings_.count(k) > 0)
|
| 71 |
+
return true;
|
| 72 |
+
if (lists_.count(k) > 0)
|
| 73 |
+
return false;
|
| 74 |
+
if (parent)
|
| 75 |
+
return parent->keyIsString(k);
|
| 76 |
+
notFound(k);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
private:
|
| 80 |
+
[[noreturn]] void notFound(const std::string& k) const {
|
| 81 |
+
std::stringstream ss;
|
| 82 |
+
ss << "key not found: " << k;
|
| 83 |
+
throw std::logic_error(ss.str());
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
std::unordered_map<std::string, std::string> strings_;
|
| 87 |
+
std::unordered_map<std::string, string_list> lists_;
|
| 88 |
+
TemplateEnv* parent{nullptr};
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
/*
|
| 92 |
+
# Match $identifier or ${identifier} and replace with the value in env.
|
| 93 |
+
# If this identifier is at the beginning of whitespace on a line
|
| 94 |
+
# and its value is a list then it is treated as
|
| 95 |
+
# block substitution by indenting all lines of all elements.
|
| 96 |
+
# If the identifier is on a line starting with non-whitespace and a list
|
| 97 |
+
# then it is comma separated. ${,foo} will insert a comma before the list
|
| 98 |
+
# if this list is not empty and ${foo,} will insert one after.
|
| 99 |
+
*/
|
| 100 |
+
struct CodeTemplate {
|
| 101 |
+
/* implicit */ CodeTemplate(std::string t) : template_text(std::move(t)) {}
|
| 102 |
+
|
| 103 |
+
std::string format(const TemplateEnv& env) const {
|
| 104 |
+
std::stringstream out;
|
| 105 |
+
size_t pos = 0;
|
| 106 |
+
size_t indent = 0;
|
| 107 |
+
bool all_whitespace = true;
|
| 108 |
+
while (pos < template_text.size()) {
|
| 109 |
+
char c = template_text[pos];
|
| 110 |
+
if (c == '$') {
|
| 111 |
+
std::stringstream kss;
|
| 112 |
+
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
| 113 |
+
bool comma_before;
|
| 114 |
+
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
| 115 |
+
bool comma_after;
|
| 116 |
+
size_t new_pos = parseKey(pos, kss, comma_before, comma_after);
|
| 117 |
+
std::string k = kss.str();
|
| 118 |
+
bool is_string = env.keyIsString(k);
|
| 119 |
+
if (all_whitespace) {
|
| 120 |
+
if (is_string)
|
| 121 |
+
emitStringWithIndents(out, indent, env.s(k));
|
| 122 |
+
else
|
| 123 |
+
emitLinesIndented(out, indent, env.v(k));
|
| 124 |
+
} else {
|
| 125 |
+
if (is_string)
|
| 126 |
+
out << env.s(k);
|
| 127 |
+
else
|
| 128 |
+
emitCommaSeparatedList(out, env.v(k), comma_before, comma_after);
|
| 129 |
+
}
|
| 130 |
+
all_whitespace = false;
|
| 131 |
+
pos = new_pos;
|
| 132 |
+
} else {
|
| 133 |
+
out << c;
|
| 134 |
+
if (!isspace(c))
|
| 135 |
+
all_whitespace = false;
|
| 136 |
+
indent++;
|
| 137 |
+
if (c == '\n') {
|
| 138 |
+
indent = 0;
|
| 139 |
+
all_whitespace = true;
|
| 140 |
+
}
|
| 141 |
+
pos++;
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
return out.str();
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
private:
|
| 148 |
+
using string_list = std::vector<std::string>;
|
| 149 |
+
char charAt(size_t p) const {
|
| 150 |
+
if (p >= template_text.size())
|
| 151 |
+
throw std::logic_error("EOS found in key");
|
| 152 |
+
return template_text[p];
|
| 153 |
+
}
|
| 154 |
+
size_t parseKey(
|
| 155 |
+
size_t pos,
|
| 156 |
+
std::ostream& k,
|
| 157 |
+
bool& comma_before,
|
| 158 |
+
bool& comma_after) const {
|
| 159 |
+
comma_before = false;
|
| 160 |
+
comma_after = false;
|
| 161 |
+
pos++;
|
| 162 |
+
if (charAt(pos) == '{') {
|
| 163 |
+
pos++;
|
| 164 |
+
if (charAt(pos) == ',') {
|
| 165 |
+
comma_before = true;
|
| 166 |
+
pos++;
|
| 167 |
+
}
|
| 168 |
+
pos = parseIdent(pos, k);
|
| 169 |
+
if (charAt(pos) == ',') {
|
| 170 |
+
comma_after = true;
|
| 171 |
+
pos++;
|
| 172 |
+
}
|
| 173 |
+
if (charAt(pos) != '}')
|
| 174 |
+
throw std::logic_error("missing terminating '}'");
|
| 175 |
+
pos++;
|
| 176 |
+
return pos;
|
| 177 |
+
} else {
|
| 178 |
+
return parseIdent(pos, k);
|
| 179 |
+
}
|
| 180 |
+
}
|
| 181 |
+
size_t parseIdent(size_t pos, std::ostream& k) const {
|
| 182 |
+
while (pos < template_text.size() &&
|
| 183 |
+
(isalnum(template_text[pos]) || template_text[pos] == '_')) {
|
| 184 |
+
k << template_text[pos];
|
| 185 |
+
pos++;
|
| 186 |
+
}
|
| 187 |
+
return pos;
|
| 188 |
+
}
|
| 189 |
+
void emitCommaSeparatedList(
|
| 190 |
+
std::ostream& out,
|
| 191 |
+
const string_list& strings,
|
| 192 |
+
bool comma_before,
|
| 193 |
+
bool comma_after) const {
|
| 194 |
+
if (comma_before && !strings.empty())
|
| 195 |
+
out << ", ";
|
| 196 |
+
for (const auto i : c10::irange(strings.size())) {
|
| 197 |
+
if (i > 0)
|
| 198 |
+
out << ", ";
|
| 199 |
+
out << strings[i];
|
| 200 |
+
}
|
| 201 |
+
if (comma_after && !strings.empty())
|
| 202 |
+
out << ", ";
|
| 203 |
+
}
|
| 204 |
+
// These indentation functions follow the convention that they never emit
|
| 205 |
+
// leading or trailing newlines when the input string does not have leading
|
| 206 |
+
// or trailing newlines. It's the responsibility of the calling function
|
| 207 |
+
// to indent correctly in the context.
|
| 208 |
+
void emitIndent(std::ostream& out, size_t indent) const {
|
| 209 |
+
for (C10_UNUSED const auto i : c10::irange(indent)) {
|
| 210 |
+
out << " ";
|
| 211 |
+
}
|
| 212 |
+
}
|
| 213 |
+
void emitStringWithIndents(
|
| 214 |
+
std::ostream& out,
|
| 215 |
+
size_t indent,
|
| 216 |
+
const std::string& str) const {
|
| 217 |
+
for (auto c : str) {
|
| 218 |
+
out << c;
|
| 219 |
+
if (c == '\n') {
|
| 220 |
+
emitIndent(out, indent);
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
}
|
| 224 |
+
void emitLinesIndented(
|
| 225 |
+
std::stringstream& out,
|
| 226 |
+
size_t indent,
|
| 227 |
+
const string_list& strings) const {
|
| 228 |
+
for (const auto i : c10::irange(strings.size())) {
|
| 229 |
+
if (i > 0)
|
| 230 |
+
emitIndent(out, indent);
|
| 231 |
+
emitStringWithIndents(out, indent, strings[i]);
|
| 232 |
+
if (i + 1 != strings.size())
|
| 233 |
+
out << "\n";
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
std::string template_text;
|
| 237 |
+
};
|
| 238 |
+
|
| 239 |
+
static inline std::string format(const std::string& fmt, TemplateEnv& env) {
|
| 240 |
+
return CodeTemplate(fmt).format(env);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
} // namespace at::jit
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace compositeexplicitautograd {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor & _adaptive_avg_pool2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size);
|
| 21 |
+
TORCH_API at::Tensor & _adaptive_avg_pool2d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out);
|
| 22 |
+
TORCH_API at::Tensor & _adaptive_avg_pool2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size);
|
| 23 |
+
TORCH_API at::Tensor & _adaptive_avg_pool2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out);
|
| 24 |
+
|
| 25 |
+
} // namespace compositeexplicitautograd
|
| 26 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API at::Tensor _cast_Short(const at::Tensor & self, bool non_blocking=false);
|
| 20 |
+
} // namespace native
|
| 21 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Operator.h
|
| 4 |
+
|
| 5 |
+
#include <tuple>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 9 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 10 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 11 |
+
#include <ATen/core/ATen_fwd.h>
|
| 12 |
+
|
| 13 |
+
namespace at {
|
| 14 |
+
namespace _ops {
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
struct TORCH_API _debug_has_internal_overlap {
|
| 18 |
+
using schema = int64_t (const at::Tensor &);
|
| 19 |
+
using ptr_schema = schema*;
|
| 20 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 21 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_debug_has_internal_overlap")
|
| 22 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 23 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_debug_has_internal_overlap(Tensor self) -> int")
|
| 24 |
+
static int64_t call(const at::Tensor & self);
|
| 25 |
+
static int64_t redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
}} // namespace at::_ops
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cuda {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor _fake_quantize_learnable_per_tensor_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor=1.0);
|
| 21 |
+
|
| 22 |
+
} // namespace cuda
|
| 23 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _flash_attention_backward(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, c10::optional<double> scale=c10::nullopt);
|
| 20 |
+
} // namespace native
|
| 21 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API void _foreach_acos_out(at::TensorList self, at::TensorList out);
|
| 20 |
+
TORCH_API ::std::vector<at::Tensor> foreach_tensor_acos_slow(at::TensorList self);
|
| 21 |
+
TORCH_API void foreach_tensor_acos_slow_(at::TensorList self);
|
| 22 |
+
TORCH_API ::std::vector<at::Tensor> foreach_tensor_acos_cuda(at::TensorList self);
|
| 23 |
+
TORCH_API void foreach_tensor_acos_cuda_(at::TensorList self);
|
| 24 |
+
} // namespace native
|
| 25 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeMetaFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/TensorIterator.h>
|
| 13 |
+
#include <ATen/TensorMeta.h>
|
| 14 |
+
#include <tuple>
|
| 15 |
+
#include <vector>
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace meta {
|
| 19 |
+
|
| 20 |
+
struct TORCH_API structured__linalg_slogdet : public at::impl::MetaBase {
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
void meta(const at::Tensor & A);
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
} // namespace native
|
| 27 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cpu {
|
| 19 |
+
|
| 20 |
+
TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _linalg_svd(const at::Tensor & A, bool full_matrices=false, bool compute_uv=true, c10::optional<c10::string_view> driver=c10::nullopt);
|
| 21 |
+
TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _linalg_svd_out(at::Tensor & U, at::Tensor & S, at::Tensor & Vh, const at::Tensor & A, bool full_matrices=false, bool compute_uv=true, c10::optional<c10::string_view> driver=c10::nullopt);
|
| 22 |
+
TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _linalg_svd_outf(const at::Tensor & A, bool full_matrices, bool compute_uv, c10::optional<c10::string_view> driver, at::Tensor & U, at::Tensor & S, at::Tensor & Vh);
|
| 23 |
+
|
| 24 |
+
} // namespace cpu
|
| 25 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Function.h
|
| 4 |
+
|
| 5 |
+
#include <ATen/Context.h>
|
| 6 |
+
#include <ATen/DeviceGuard.h>
|
| 7 |
+
#include <ATen/TensorUtils.h>
|
| 8 |
+
#include <ATen/TracerMode.h>
|
| 9 |
+
#include <ATen/core/Generator.h>
|
| 10 |
+
#include <ATen/core/Reduction.h>
|
| 11 |
+
#include <ATen/core/Tensor.h>
|
| 12 |
+
#include <c10/core/Scalar.h>
|
| 13 |
+
#include <c10/core/Storage.h>
|
| 14 |
+
#include <c10/core/TensorOptions.h>
|
| 15 |
+
#include <c10/util/Deprecated.h>
|
| 16 |
+
#include <c10/util/Optional.h>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#include <ATen/ops/_logcumsumexp_ops.h>
|
| 21 |
+
|
| 22 |
+
namespace at {
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
// aten::_logcumsumexp(Tensor self, int dim) -> Tensor
|
| 26 |
+
inline at::Tensor _logcumsumexp(const at::Tensor & self, int64_t dim) {
|
| 27 |
+
return at::_ops::_logcumsumexp::call(self, dim);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
// aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
| 31 |
+
inline at::Tensor & _logcumsumexp_out(at::Tensor & out, const at::Tensor & self, int64_t dim) {
|
| 32 |
+
return at::_ops::_logcumsumexp_out::call(self, dim, out);
|
| 33 |
+
}
|
| 34 |
+
// aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
| 35 |
+
inline at::Tensor & _logcumsumexp_outf(const at::Tensor & self, int64_t dim, at::Tensor & out) {
|
| 36 |
+
return at::_ops::_logcumsumexp_out::call(self, dim, out);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
}
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Function.h
|
| 4 |
+
|
| 5 |
+
#include <ATen/Context.h>
|
| 6 |
+
#include <ATen/DeviceGuard.h>
|
| 7 |
+
#include <ATen/TensorUtils.h>
|
| 8 |
+
#include <ATen/TracerMode.h>
|
| 9 |
+
#include <ATen/core/Generator.h>
|
| 10 |
+
#include <ATen/core/Reduction.h>
|
| 11 |
+
#include <ATen/core/Tensor.h>
|
| 12 |
+
#include <c10/core/Scalar.h>
|
| 13 |
+
#include <c10/core/Storage.h>
|
| 14 |
+
#include <c10/core/TensorOptions.h>
|
| 15 |
+
#include <c10/util/Deprecated.h>
|
| 16 |
+
#include <c10/util/Optional.h>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#include <ATen/ops/_native_multi_head_attention_ops.h>
|
| 21 |
+
|
| 22 |
+
namespace at {
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
// aten::_native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)
|
| 26 |
+
inline ::std::tuple<at::Tensor,at::Tensor> _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional<int64_t> mask_type=c10::nullopt) {
|
| 27 |
+
return at::_ops::_native_multi_head_attention::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
// aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))
|
| 31 |
+
inline ::std::tuple<at::Tensor &,at::Tensor &> _native_multi_head_attention_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional<int64_t> mask_type=c10::nullopt) {
|
| 32 |
+
return at::_ops::_native_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type, out0, out1);
|
| 33 |
+
}
|
| 34 |
+
// aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))
|
| 35 |
+
inline ::std::tuple<at::Tensor &,at::Tensor &> _native_multi_head_attention_outf(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type, at::Tensor & out0, at::Tensor & out1) {
|
| 36 |
+
return at::_ops::_native_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type, out0, out1);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
}
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API at::Tensor & _nested_tensor_storage_offsets_out(const at::Tensor & self, at::Tensor & out);
|
| 20 |
+
TORCH_API at::Tensor _nested_tensor_storage_offsets(const at::Tensor & self);
|
| 21 |
+
} // namespace native
|
| 22 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out);
|
| 20 |
+
TORCH_API at::Tensor log_softmax_backward_sparse_cpu(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self);
|
| 21 |
+
TORCH_API at::Tensor log_softmax_backward_sparse_cuda(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self);
|
| 22 |
+
} // namespace native
|
| 23 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API at::Tensor & _to_sparse_sparse_dim_out(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out);
|
| 20 |
+
TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, int64_t sparse_dim);
|
| 21 |
+
TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, int64_t sparse_dim);
|
| 22 |
+
TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, int64_t sparse_dim);
|
| 23 |
+
TORCH_API at::Tensor & _to_sparse_out(const at::Tensor & self, c10::optional<at::Layout> layout, at::OptionalIntArrayRef blocksize, c10::optional<int64_t> dense_dim, at::Tensor & out);
|
| 24 |
+
TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, c10::optional<at::Layout> layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional<int64_t> dense_dim=c10::nullopt);
|
| 25 |
+
TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, c10::optional<at::Layout> layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional<int64_t> dense_dim=c10::nullopt);
|
| 26 |
+
TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, c10::optional<at::Layout> layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional<int64_t> dense_dim=c10::nullopt);
|
| 27 |
+
} // namespace native
|
| 28 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeMetaFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/TensorIterator.h>
|
| 13 |
+
#include <ATen/TensorMeta.h>
|
| 14 |
+
#include <tuple>
|
| 15 |
+
#include <vector>
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace meta {
|
| 19 |
+
|
| 20 |
+
struct TORCH_API structured__upsample_nearest_exact2d_backward : public at::impl::MetaBase {
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
void meta(const at::Tensor & grad_output, at::ArrayRef<int64_t> output_size, at::ArrayRef<int64_t> input_size, c10::optional<double> scales_h, c10::optional<double> scales_w);
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
} // namespace native
|
| 27 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Function.h
|
| 4 |
+
|
| 5 |
+
#include <ATen/Context.h>
|
| 6 |
+
#include <ATen/DeviceGuard.h>
|
| 7 |
+
#include <ATen/TensorUtils.h>
|
| 8 |
+
#include <ATen/TracerMode.h>
|
| 9 |
+
#include <ATen/core/Generator.h>
|
| 10 |
+
#include <ATen/core/Reduction.h>
|
| 11 |
+
#include <ATen/core/Tensor.h>
|
| 12 |
+
#include <c10/core/Scalar.h>
|
| 13 |
+
#include <c10/core/Storage.h>
|
| 14 |
+
#include <c10/core/TensorOptions.h>
|
| 15 |
+
#include <c10/util/Deprecated.h>
|
| 16 |
+
#include <c10/util/Optional.h>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#include <ATen/ops/_validate_sparse_csr_tensor_args_ops.h>
|
| 21 |
+
|
| 22 |
+
namespace at {
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
// aten::_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
| 26 |
+
inline void _validate_sparse_csr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size) {
|
| 27 |
+
return at::_ops::_validate_sparse_csr_tensor_args::call(crow_indices, col_indices, values, size);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
}
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Operator.h
|
| 4 |
+
|
| 5 |
+
#include <tuple>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 9 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 10 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 11 |
+
#include <ATen/core/ATen_fwd.h>
|
| 12 |
+
|
| 13 |
+
namespace at {
|
| 14 |
+
namespace _ops {
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
struct TORCH_API align_as {
|
| 18 |
+
using schema = at::Tensor (const at::Tensor &, const at::Tensor &);
|
| 19 |
+
using ptr_schema = schema*;
|
| 20 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 21 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::align_as")
|
| 22 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 23 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "align_as(Tensor self, Tensor other) -> Tensor")
|
| 24 |
+
static at::Tensor call(const at::Tensor & self, const at::Tensor & other);
|
| 25 |
+
static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
}} // namespace at::_ops
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeMetaFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/TensorIterator.h>
|
| 13 |
+
#include <ATen/TensorMeta.h>
|
| 14 |
+
#include <tuple>
|
| 15 |
+
#include <vector>
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace meta {
|
| 19 |
+
|
| 20 |
+
struct TORCH_API structured_aminmax : public at::impl::MetaBase {
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
void meta(const at::Tensor & self, c10::optional<int64_t> dim, bool keepdim);
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
} // namespace native
|
| 27 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Operator.h
|
| 4 |
+
|
| 5 |
+
#include <tuple>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 9 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 10 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 11 |
+
#include <ATen/core/ATen_fwd.h>
|
| 12 |
+
|
| 13 |
+
namespace at {
|
| 14 |
+
namespace _ops {
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
struct TORCH_API ceil {
|
| 18 |
+
using schema = at::Tensor (const at::Tensor &);
|
| 19 |
+
using ptr_schema = schema*;
|
| 20 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 21 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil")
|
| 22 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 23 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil(Tensor self) -> Tensor")
|
| 24 |
+
static at::Tensor call(const at::Tensor & self);
|
| 25 |
+
static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
struct TORCH_API ceil_ {
|
| 29 |
+
using schema = at::Tensor & (at::Tensor &);
|
| 30 |
+
using ptr_schema = schema*;
|
| 31 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 32 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil_")
|
| 33 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 34 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil_(Tensor(a!) self) -> Tensor(a!)")
|
| 35 |
+
static at::Tensor & call(at::Tensor & self);
|
| 36 |
+
static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self);
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
struct TORCH_API ceil_out {
|
| 40 |
+
using schema = at::Tensor & (const at::Tensor &, at::Tensor &);
|
| 41 |
+
using ptr_schema = schema*;
|
| 42 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 43 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil")
|
| 44 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
|
| 45 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)")
|
| 46 |
+
static at::Tensor & call(const at::Tensor & self, at::Tensor & out);
|
| 47 |
+
static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out);
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
}} // namespace at::_ops
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace compositeexplicitautograd {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value=0);
|
| 21 |
+
TORCH_API at::Tensor constant_pad_nd_symint(const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value=0);
|
| 22 |
+
TORCH_API at::Tensor & constant_pad_nd_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value=0);
|
| 23 |
+
TORCH_API at::Tensor & constant_pad_nd_outf(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value, at::Tensor & out);
|
| 24 |
+
TORCH_API at::Tensor & constant_pad_nd_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value=0);
|
| 25 |
+
TORCH_API at::Tensor & constant_pad_nd_symint_outf(const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value, at::Tensor & out);
|
| 26 |
+
|
| 27 |
+
} // namespace compositeexplicitautograd
|
| 28 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cuda {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor dot(const at::Tensor & self, const at::Tensor & tensor);
|
| 21 |
+
|
| 22 |
+
} // namespace cuda
|
| 23 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Function.h
|
| 4 |
+
|
| 5 |
+
#include <ATen/Context.h>
|
| 6 |
+
#include <ATen/DeviceGuard.h>
|
| 7 |
+
#include <ATen/TensorUtils.h>
|
| 8 |
+
#include <ATen/TracerMode.h>
|
| 9 |
+
#include <ATen/core/Generator.h>
|
| 10 |
+
#include <ATen/core/Reduction.h>
|
| 11 |
+
#include <ATen/core/Tensor.h>
|
| 12 |
+
#include <c10/core/Scalar.h>
|
| 13 |
+
#include <c10/core/Storage.h>
|
| 14 |
+
#include <c10/core/TensorOptions.h>
|
| 15 |
+
#include <c10/util/Deprecated.h>
|
| 16 |
+
#include <c10/util/Optional.h>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#include <ATen/ops/fft_hfft_ops.h>
|
| 21 |
+
|
| 22 |
+
namespace at {
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
// aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
| 26 |
+
inline at::Tensor fft_hfft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 27 |
+
return at::_ops::fft_hfft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
|
| 28 |
+
}
|
| 29 |
+
namespace symint {
|
| 30 |
+
template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
|
| 31 |
+
at::Tensor fft_hfft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 32 |
+
return at::_ops::fft_hfft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
// aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
| 37 |
+
inline at::Tensor fft_hfft_symint(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 38 |
+
return at::_ops::fft_hfft::call(self, n, dim, norm);
|
| 39 |
+
}
|
| 40 |
+
namespace symint {
|
| 41 |
+
template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
|
| 42 |
+
at::Tensor fft_hfft(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 43 |
+
return at::_ops::fft_hfft::call(self, n, dim, norm);
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
| 48 |
+
inline at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 49 |
+
return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
|
| 50 |
+
}
|
| 51 |
+
namespace symint {
|
| 52 |
+
template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
|
| 53 |
+
at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 54 |
+
return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
| 59 |
+
inline at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
|
| 60 |
+
return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
|
| 61 |
+
}
|
| 62 |
+
namespace symint {
|
| 63 |
+
template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
|
| 64 |
+
at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
|
| 65 |
+
return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
| 70 |
+
inline at::Tensor & fft_hfft_symint_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 71 |
+
return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
|
| 72 |
+
}
|
| 73 |
+
namespace symint {
|
| 74 |
+
template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
|
| 75 |
+
at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
|
| 76 |
+
return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
// aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
| 81 |
+
inline at::Tensor & fft_hfft_symint_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
|
| 82 |
+
return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
|
| 83 |
+
}
|
| 84 |
+
namespace symint {
|
| 85 |
+
template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
|
| 86 |
+
at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
|
| 87 |
+
return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
|
| 88 |
+
}
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
}
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API at::Tensor fft_irfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt);
|
| 20 |
+
TORCH_API at::Tensor & fft_irfft2_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out);
|
| 21 |
+
} // namespace native
|
| 22 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API at::Tensor fft_rfftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional<c10::string_view> norm=c10::nullopt);
|
| 20 |
+
TORCH_API at::Tensor & fft_rfftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out);
|
| 21 |
+
} // namespace native
|
| 22 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Function.h
|
| 4 |
+
|
| 5 |
+
#include <ATen/Context.h>
|
| 6 |
+
#include <ATen/DeviceGuard.h>
|
| 7 |
+
#include <ATen/TensorUtils.h>
|
| 8 |
+
#include <ATen/TracerMode.h>
|
| 9 |
+
#include <ATen/core/Generator.h>
|
| 10 |
+
#include <ATen/core/Reduction.h>
|
| 11 |
+
#include <ATen/core/Tensor.h>
|
| 12 |
+
#include <c10/core/Scalar.h>
|
| 13 |
+
#include <c10/core/Storage.h>
|
| 14 |
+
#include <c10/core/TensorOptions.h>
|
| 15 |
+
#include <c10/util/Deprecated.h>
|
| 16 |
+
#include <c10/util/Optional.h>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#include <ATen/ops/fix_ops.h>
|
| 21 |
+
|
| 22 |
+
namespace at {
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
// aten::fix(Tensor self) -> Tensor
|
| 26 |
+
inline at::Tensor fix(const at::Tensor & self) {
|
| 27 |
+
return at::_ops::fix::call(self);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
// aten::fix_(Tensor(a!) self) -> Tensor(a!)
|
| 31 |
+
inline at::Tensor & fix_(at::Tensor & self) {
|
| 32 |
+
return at::_ops::fix_::call(self);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
// aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
| 36 |
+
inline at::Tensor & fix_out(at::Tensor & out, const at::Tensor & self) {
|
| 37 |
+
return at::_ops::fix_out::call(self, out);
|
| 38 |
+
}
|
| 39 |
+
// aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
| 40 |
+
inline at::Tensor & fix_outf(const at::Tensor & self, at::Tensor & out) {
|
| 41 |
+
return at::_ops::fix_out::call(self, out);
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
}
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace compositeimplicitautograd {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Tensor & exponent);
|
| 21 |
+
TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & exponent);
|
| 22 |
+
TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Tensor & exponent, at::Tensor & out);
|
| 23 |
+
TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Tensor & exponent);
|
| 24 |
+
TORCH_API at::Tensor float_power(const at::Scalar & self, const at::Tensor & exponent);
|
| 25 |
+
TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & exponent);
|
| 26 |
+
TORCH_API at::Tensor & float_power_outf(const at::Scalar & self, const at::Tensor & exponent, at::Tensor & out);
|
| 27 |
+
TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Scalar & exponent);
|
| 28 |
+
TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & exponent);
|
| 29 |
+
TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Scalar & exponent, at::Tensor & out);
|
| 30 |
+
TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Scalar & exponent);
|
| 31 |
+
|
| 32 |
+
} // namespace compositeimplicitautograd
|
| 33 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cpu {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor & geometric_(at::Tensor & self, double p, c10::optional<at::Generator> generator=c10::nullopt);
|
| 21 |
+
|
| 22 |
+
} // namespace cpu
|
| 23 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Operator.h
|
| 4 |
+
|
| 5 |
+
#include <tuple>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 9 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 10 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 11 |
+
#include <ATen/core/ATen_fwd.h>
|
| 12 |
+
|
| 13 |
+
namespace at {
|
| 14 |
+
namespace _ops {
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
struct TORCH_API glu_backward_grad_input {
|
| 18 |
+
using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &);
|
| 19 |
+
using ptr_schema = schema*;
|
| 20 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 21 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::glu_backward")
|
| 22 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input")
|
| 23 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)")
|
| 24 |
+
static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input);
|
| 25 |
+
static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
struct TORCH_API glu_backward {
|
| 29 |
+
using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t);
|
| 30 |
+
using ptr_schema = schema*;
|
| 31 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 32 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::glu_backward")
|
| 33 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 34 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor")
|
| 35 |
+
static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim);
|
| 36 |
+
static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim);
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
}} // namespace at::_ops
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
#include <ATen/ops/hardshrink_meta.h>
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
struct TORCH_API structured_hardshrink_out : public at::meta::structured_hardshrink {
|
| 20 |
+
void impl(const at::Tensor & self, const at::Scalar & lambd, const at::Tensor & out);
|
| 21 |
+
};
|
| 22 |
+
} // namespace native
|
| 23 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cpu {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor hardtanh_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val);
|
| 21 |
+
TORCH_API at::Tensor & hardtanh_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val);
|
| 22 |
+
TORCH_API at::Tensor & hardtanh_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & grad_input);
|
| 23 |
+
|
| 24 |
+
} // namespace cpu
|
| 25 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cuda {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor isposinf(const at::Tensor & self);
|
| 21 |
+
TORCH_API at::Tensor & isposinf_out(at::Tensor & out, const at::Tensor & self);
|
| 22 |
+
TORCH_API at::Tensor & isposinf_outf(const at::Tensor & self, at::Tensor & out);
|
| 23 |
+
|
| 24 |
+
} // namespace cuda
|
| 25 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace compositeexplicitautogradnonfunctional {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor le(const at::Tensor & self, const at::Scalar & other);
|
| 21 |
+
TORCH_API at::Tensor & le_(at::Tensor & self, const at::Scalar & other);
|
| 22 |
+
TORCH_API at::Tensor le(const at::Tensor & self, const at::Tensor & other);
|
| 23 |
+
TORCH_API at::Tensor & le_(at::Tensor & self, const at::Tensor & other);
|
| 24 |
+
|
| 25 |
+
} // namespace compositeexplicitautogradnonfunctional
|
| 26 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace compositeexplicitautogradnonfunctional {
|
| 19 |
+
|
| 20 |
+
TORCH_API ::std::tuple<at::Tensor,at::Tensor> linalg_qr(const at::Tensor & A, c10::string_view mode="reduced");
|
| 21 |
+
|
| 22 |
+
} // namespace compositeexplicitautogradnonfunctional
|
| 23 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/core/Tensor.h>
|
| 13 |
+
#include <tuple>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace native {
|
| 19 |
+
TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> linalg_svd(const at::Tensor & A, bool full_matrices=true, c10::optional<c10::string_view> driver=c10::nullopt);
|
| 20 |
+
TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> linalg_svd_out(const at::Tensor & A, bool full_matrices, c10::optional<c10::string_view> driver, at::Tensor & U, at::Tensor & S, at::Tensor & Vh);
|
| 21 |
+
} // namespace native
|
| 22 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from NativeMetaFunction.h
|
| 4 |
+
|
| 5 |
+
#include <c10/core/Scalar.h>
|
| 6 |
+
#include <c10/core/Storage.h>
|
| 7 |
+
#include <c10/core/TensorOptions.h>
|
| 8 |
+
#include <c10/util/Deprecated.h>
|
| 9 |
+
#include <c10/util/Optional.h>
|
| 10 |
+
#include <c10/core/QScheme.h>
|
| 11 |
+
#include <ATen/core/Reduction.h>
|
| 12 |
+
#include <ATen/TensorIterator.h>
|
| 13 |
+
#include <ATen/TensorMeta.h>
|
| 14 |
+
#include <tuple>
|
| 15 |
+
#include <vector>
|
| 16 |
+
|
| 17 |
+
namespace at {
|
| 18 |
+
namespace meta {
|
| 19 |
+
|
| 20 |
+
struct TORCH_API structured_linalg_vector_norm : public at::impl::MetaBase {
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
void meta(const at::Tensor & self, const at::Scalar & ord, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype);
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
} // namespace native
|
| 27 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cuda {
|
| 19 |
+
|
| 20 |
+
TORCH_API at::Tensor logaddexp2(const at::Tensor & self, const at::Tensor & other);
|
| 21 |
+
TORCH_API at::Tensor & logaddexp2_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
|
| 22 |
+
TORCH_API at::Tensor & logaddexp2_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
|
| 23 |
+
|
| 24 |
+
} // namespace cuda
|
| 25 |
+
} // namespace at
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Function.h
|
| 4 |
+
|
| 5 |
+
#include <ATen/Context.h>
|
| 6 |
+
#include <ATen/DeviceGuard.h>
|
| 7 |
+
#include <ATen/TensorUtils.h>
|
| 8 |
+
#include <ATen/TracerMode.h>
|
| 9 |
+
#include <ATen/core/Generator.h>
|
| 10 |
+
#include <ATen/core/Reduction.h>
|
| 11 |
+
#include <ATen/core/Tensor.h>
|
| 12 |
+
#include <c10/core/Scalar.h>
|
| 13 |
+
#include <c10/core/Storage.h>
|
| 14 |
+
#include <c10/core/TensorOptions.h>
|
| 15 |
+
#include <c10/util/Deprecated.h>
|
| 16 |
+
#include <c10/util/Optional.h>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#include <ATen/ops/max_pool1d_with_indices_ops.h>
|
| 21 |
+
|
| 22 |
+
namespace at {
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
// aten::max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
|
| 26 |
+
inline ::std::tuple<at::Tensor,at::Tensor> max_pool1d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) {
|
| 27 |
+
return at::_ops::max_pool1d_with_indices::call(self, kernel_size, stride, padding, dilation, ceil_mode);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
}
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Operator.h
|
| 4 |
+
|
| 5 |
+
#include <tuple>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 9 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 10 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 11 |
+
#include <ATen/core/ATen_fwd.h>
|
| 12 |
+
|
| 13 |
+
namespace at {
|
| 14 |
+
namespace _ops {
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
struct TORCH_API max_pool2d_with_indices_out {
|
| 18 |
+
using schema = ::std::tuple<at::Tensor &,at::Tensor &> (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool, at::Tensor &, at::Tensor &);
|
| 19 |
+
using ptr_schema = schema*;
|
| 20 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 21 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool2d_with_indices")
|
| 22 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
|
| 23 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))")
|
| 24 |
+
static ::std::tuple<at::Tensor &,at::Tensor &> call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices);
|
| 25 |
+
static ::std::tuple<at::Tensor &,at::Tensor &> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
struct TORCH_API max_pool2d_with_indices {
|
| 29 |
+
using schema = ::std::tuple<at::Tensor,at::Tensor> (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool);
|
| 30 |
+
using ptr_schema = schema*;
|
| 31 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 32 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool2d_with_indices")
|
| 33 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 34 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)")
|
| 35 |
+
static ::std::tuple<at::Tensor,at::Tensor> call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode);
|
| 36 |
+
static ::std::tuple<at::Tensor,at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode);
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
}} // namespace at::_ops
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
// @generated by torchgen/gen.py from Operator.h
|
| 4 |
+
|
| 5 |
+
#include <tuple>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 9 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 10 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 11 |
+
#include <ATen/core/ATen_fwd.h>
|
| 12 |
+
|
| 13 |
+
namespace at {
|
| 14 |
+
namespace _ops {
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
struct TORCH_API miopen_convolution_relu {
|
| 18 |
+
using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const c10::optional<at::Tensor> &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt);
|
| 19 |
+
using ptr_schema = schema*;
|
| 20 |
+
// See Note [static constexpr char* members for windows NVCC]
|
| 21 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::miopen_convolution_relu")
|
| 22 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
|
| 23 |
+
STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor")
|
| 24 |
+
static at::Tensor call(const at::Tensor & self, const at::Tensor & weight, const c10::optional<at::Tensor> & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups);
|
| 25 |
+
static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, const c10::optional<at::Tensor> & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups);
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
}} // namespace at::_ops
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// @generated by torchgen/gen.py from DispatchKeyFunction.h
|
| 3 |
+
|
| 4 |
+
// NB: The implementing C++ file is RegisterDispatchKey.cpp
|
| 5 |
+
|
| 6 |
+
// The only #includes we need are for custom classes that have defaults in the C++ API
|
| 7 |
+
#include <c10/core/MemoryFormat.h>
|
| 8 |
+
#include <c10/core/Scalar.h>
|
| 9 |
+
#include <ATen/core/Reduction.h>
|
| 10 |
+
|
| 11 |
+
// Forward declarations of any types needed in the operator signatures.
|
| 12 |
+
// We can't directly include these classes because it will cause circular include dependencies.
|
| 13 |
+
// This file is included by TensorBody.h, which defines the Tensor class.
|
| 14 |
+
#include <ATen/core/ATen_fwd.h>
|
| 15 |
+
|
| 16 |
+
namespace at {
|
| 17 |
+
|
| 18 |
+
namespace cpu {
|
| 19 |
+
|
| 20 |
+
TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor> mkldnn_rnn_layer_backward(const at::Tensor & input, const at::Tensor & weight1, const at::Tensor & weight2, const at::Tensor & weight3, const at::Tensor & weight4, const at::Tensor & hx_, const at::Tensor & cx_tmp, const at::Tensor & output, const at::Tensor & hy_, const at::Tensor & cy_, const c10::optional<at::Tensor> & grad_output, const c10::optional<at::Tensor> & grad_hy, const c10::optional<at::Tensor> & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes, bool batch_first, const at::Tensor & workspace);
|
| 21 |
+
|
| 22 |
+
} // namespace cpu
|
| 23 |
+
} // namespace at
|