koichi12 commited on
Commit
0b443f9
·
verified ·
1 Parent(s): 25bcc66

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_cuda_dispatch.h +24 -0
  2. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_empty_affine_quantized_native.h +23 -0
  3. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cpu_dispatch.h +24 -0
  4. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cuda_dispatch.h +24 -0
  5. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_frac_ops.h +50 -0
  6. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_mul_native.h +40 -0
  7. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_trunc_compositeexplicitautograd_dispatch.h +24 -0
  8. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_zero_compositeexplicitautograd_dispatch.h +25 -0
  9. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_mkldnn_reshape.h +39 -0
  10. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_get_values_native.h +20 -0
  11. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_print_compositeexplicitautograd_dispatch.h +23 -0
  12. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h +28 -0
  13. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h +24 -0
  14. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h +23 -0
  15. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h +24 -0
  16. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h +24 -0
  17. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_unsafe_index_put.h +30 -0
  18. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h +27 -0
  19. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h +24 -0
  20. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_version.h +26 -0
  21. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h +21 -0
  22. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/add_meta.h +27 -0
  23. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arcsin_native.h +23 -0
  24. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/asin_cpu_dispatch.h +26 -0
  25. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/atan_native.h +29 -0
  26. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt.h +39 -0
  27. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_elemt.h +39 -0
  28. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_stats_native.h +22 -0
  29. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/binary_cross_entropy_backward_ops.h +39 -0
  30. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bmm_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
  31. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/channel_shuffle.h +91 -0
  32. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/col2im_native.h +24 -0
  33. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/crow_indices.h +26 -0
  34. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose.h +91 -0
  35. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dist_native.h +22 -0
  36. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/embedding_native.h +23 -0
  37. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/erfc_meta_dispatch.h +26 -0
  38. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_fft.h +91 -0
  39. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward.h +39 -0
  40. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gather_cuda_dispatch.h +25 -0
  41. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge.h +53 -0
  42. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge_cpu_dispatch.h +30 -0
  43. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
  44. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hsplit_ops.h +39 -0
  45. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/indices.h +26 -0
  46. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/item_native.h +21 -0
  47. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kl_div.h +30 -0
  48. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kthvalue_cpu_dispatch.h +24 -0
  49. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h +26 -0
  50. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_cuda_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API at::Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, at::TensorOptions options);
21
+ TORCH_API at::Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory);
22
+
23
+ } // namespace cuda
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_empty_affine_quantized_native.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor & _empty_affine_quantized_out_symint(c10::SymIntArrayRef size, double scale, int64_t zero_point, c10::optional<at::MemoryFormat> memory_format, at::Tensor & out);
20
+ TORCH_API at::Tensor empty_affine_quantized_other_backends_stub(at::IntArrayRef size, c10::optional<at::ScalarType> dtype={}, c10::optional<at::Layout> layout={}, c10::optional<at::Device> device={}, c10::optional<bool> pin_memory={}, double scale=1, int64_t zero_point=0, c10::optional<at::MemoryFormat> memory_format=MemoryFormat::Contiguous);
21
+ TORCH_API at::Tensor empty_affine_quantized(at::IntArrayRef size, c10::optional<at::ScalarType> dtype={}, c10::optional<at::Layout> layout={}, c10::optional<at::Device> device={}, c10::optional<bool> pin_memory={}, double scale=1, int64_t zero_point=0, c10::optional<at::MemoryFormat> memory_format=MemoryFormat::Contiguous);
22
+ } // namespace native
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cpu_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API ::std::vector<at::Tensor> _foreach_expm1(at::TensorList self);
21
+ TORCH_API void _foreach_expm1_(at::TensorList self);
22
+
23
+ } // namespace cpu
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cuda_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API ::std::vector<at::Tensor> _foreach_expm1(at::TensorList self);
21
+ TORCH_API void _foreach_expm1_(at::TensorList self);
22
+
23
+ } // namespace cuda
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_frac_ops.h ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API _foreach_frac {
18
+ using schema = ::std::vector<at::Tensor> (at::TensorList);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_foreach_frac")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_foreach_frac(Tensor[] self) -> Tensor[]")
24
+ static ::std::vector<at::Tensor> call(at::TensorList self);
25
+ static ::std::vector<at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self);
26
+ };
27
+
28
+ struct TORCH_API _foreach_frac_ {
29
+ using schema = void (at::TensorList);
30
+ using ptr_schema = schema*;
31
+ // See Note [static constexpr char* members for windows NVCC]
32
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_foreach_frac_")
33
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
34
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_foreach_frac_(Tensor(a!)[] self) -> ()")
35
+ static void call(at::TensorList self);
36
+ static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self);
37
+ };
38
+
39
+ struct TORCH_API _foreach_frac_out {
40
+ using schema = void (at::TensorList, at::TensorList);
41
+ using ptr_schema = schema*;
42
+ // See Note [static constexpr char* members for windows NVCC]
43
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_foreach_frac")
44
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
45
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_foreach_frac.out(Tensor[] self, *, Tensor(a!)[] out) -> ()")
46
+ static void call(at::TensorList self, at::TensorList out);
47
+ static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out);
48
+ };
49
+
50
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_mul_native.h ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API void _foreach_mul_Scalar_out(at::TensorList self, const at::Scalar & scalar, at::TensorList out);
20
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalar_kernel_slow(at::TensorList self, const at::Scalar & scalar);
21
+ TORCH_API void foreach_tensor_mul_scalar_kernel_slow_(at::TensorList self, const at::Scalar & scalar);
22
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalar_kernel_cuda(at::TensorList self, const at::Scalar & scalar);
23
+ TORCH_API void foreach_tensor_mul_scalar_kernel_cuda_(at::TensorList self, const at::Scalar & scalar);
24
+ TORCH_API void _foreach_mul_List_out(at::TensorList self, at::TensorList other, at::TensorList out);
25
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_list_kernel_slow(at::TensorList self, at::TensorList other);
26
+ TORCH_API void foreach_tensor_mul_list_kernel_slow_(at::TensorList self, at::TensorList other);
27
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_list_kernel_cuda(at::TensorList self, at::TensorList other);
28
+ TORCH_API void foreach_tensor_mul_list_kernel_cuda_(at::TensorList self, at::TensorList other);
29
+ TORCH_API void _foreach_mul_ScalarList_out(at::TensorList self, at::ArrayRef<at::Scalar> scalars, at::TensorList out);
30
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalarlist_kernel_slow(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
31
+ TORCH_API void foreach_tensor_mul_scalarlist_kernel_slow_(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
32
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalarlist_kernel_cuda(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
33
+ TORCH_API void foreach_tensor_mul_scalarlist_kernel_cuda_(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
34
+ TORCH_API void _foreach_mul_Tensor_out(at::TensorList self, const at::Tensor & other, at::TensorList out);
35
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_tensor_kernel_slow(at::TensorList self, const at::Tensor & other);
36
+ TORCH_API void foreach_tensor_mul_tensor_kernel_slow_(at::TensorList self, const at::Tensor & other);
37
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_tensor_kernel_cuda(at::TensorList self, const at::Tensor & other);
38
+ TORCH_API void foreach_tensor_mul_tensor_kernel_cuda_(at::TensorList self, const at::Tensor & other);
39
+ } // namespace native
40
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_trunc_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API void _foreach_trunc_out(at::TensorList out, at::TensorList self);
21
+ TORCH_API void _foreach_trunc_outf(at::TensorList self, at::TensorList out);
22
+
23
+ } // namespace compositeexplicitautograd
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_zero_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API ::std::vector<at::Tensor> _foreach_zero(at::TensorList self);
21
+ TORCH_API void _foreach_zero_out(at::TensorList out, at::TensorList self);
22
+ TORCH_API void _foreach_zero_outf(at::TensorList self, at::TensorList out);
23
+
24
+ } // namespace compositeexplicitautograd
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_mkldnn_reshape.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/_mkldnn_reshape_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::_mkldnn_reshape(Tensor self, int[] shape) -> Tensor
26
+ inline at::Tensor _mkldnn_reshape(const at::Tensor & self, at::IntArrayRef shape) {
27
+ return at::_ops::_mkldnn_reshape::call(self, shape);
28
+ }
29
+
30
+ // aten::_mkldnn_reshape.out(Tensor self, int[] shape, *, Tensor(a!) out) -> Tensor(a!)
31
+ inline at::Tensor & _mkldnn_reshape_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef shape) {
32
+ return at::_ops::_mkldnn_reshape_out::call(self, shape, out);
33
+ }
34
+ // aten::_mkldnn_reshape.out(Tensor self, int[] shape, *, Tensor(a!) out) -> Tensor(a!)
35
+ inline at::Tensor & _mkldnn_reshape_outf(const at::Tensor & self, at::IntArrayRef shape, at::Tensor & out) {
36
+ return at::_ops::_mkldnn_reshape_out::call(self, shape, out);
37
+ }
38
+
39
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_get_values_native.h ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ } // namespace native
20
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_print_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API void _print(c10::string_view s);
21
+
22
+ } // namespace compositeexplicitautograd
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API _reshape_from_tensor {
18
+ using schema = at::Tensor (const at::Tensor &, const at::Tensor &);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_reshape_from_tensor")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_reshape_from_tensor(Tensor self, Tensor shape) -> Tensor")
24
+ static at::Tensor call(const at::Tensor & self, const at::Tensor & shape);
25
+ static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & shape);
26
+ };
27
+
28
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeimplicitautograd {
19
+
20
+ TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a=1, int64_t b=1);
21
+ TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a, c10::string_view b);
22
+
23
+ } // namespace compositeimplicitautograd
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeimplicitautograd {
19
+
20
+ TORCH_API at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b);
21
+
22
+ } // namespace compositeimplicitautograd
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API at::Tensor & _test_functorch_fallback_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
21
+ TORCH_API at::Tensor & _test_functorch_fallback_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
22
+
23
+ } // namespace compositeexplicitautograd
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API at::Tensor & _to_sparse_bsr_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, c10::optional<int64_t> dense_dim=c10::nullopt);
21
+ TORCH_API at::Tensor & _to_sparse_bsr_outf(const at::Tensor & self, at::IntArrayRef blocksize, c10::optional<int64_t> dense_dim, at::Tensor & out);
22
+
23
+ } // namespace compositeexplicitautograd
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_unsafe_index_put.h ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/_unsafe_index_put_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::_unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
26
+ inline at::Tensor _unsafe_index_put(const at::Tensor & self, const c10::List<c10::optional<at::Tensor>> & indices, const at::Tensor & values, bool accumulate=false) {
27
+ return at::_ops::_unsafe_index_put::call(self, indices, values, accumulate);
28
+ }
29
+
30
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+ #include <ATen/ops/_upsample_nearest_exact1d_meta.h>
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
20
+ struct TORCH_API structured__upsample_nearest_exact1d_out_cpu : public at::meta::structured__upsample_nearest_exact1d {
21
+ void impl(const at::Tensor & self, at::ArrayRef<int64_t> output_size, c10::optional<double> scales, const at::Tensor & out);
22
+ };
23
+ struct TORCH_API structured__upsample_nearest_exact1d_out_cuda : public at::meta::structured__upsample_nearest_exact1d {
24
+ void impl(const at::Tensor & self, at::ArrayRef<int64_t> output_size, c10::optional<double> scales, const at::Tensor & out);
25
+ };
26
+ } // namespace native
27
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeimplicitautograd {
19
+
20
+ TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
21
+ TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
22
+
23
+ } // namespace compositeimplicitautograd
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_version.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/_version_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+
26
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor> _weight_norm_differentiable_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim);
20
+ } // namespace native
21
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/add_meta.h ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeMetaFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/TensorIterator.h>
13
+ #include <ATen/TensorMeta.h>
14
+ #include <tuple>
15
+ #include <vector>
16
+
17
+ namespace at {
18
+ namespace meta {
19
+
20
+ struct TORCH_API structured_add_Tensor : public TensorIteratorBase {
21
+
22
+
23
+ void meta(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha);
24
+ };
25
+
26
+ } // namespace native
27
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arcsin_native.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor arcsin(const at::Tensor & self);
20
+ TORCH_API at::Tensor & arcsin_out(const at::Tensor & self, at::Tensor & out);
21
+ TORCH_API at::Tensor & arcsin_(at::Tensor & self);
22
+ } // namespace native
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/asin_cpu_dispatch.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API at::Tensor asin(const at::Tensor & self);
21
+ TORCH_API at::Tensor & asin_out(at::Tensor & out, const at::Tensor & self);
22
+ TORCH_API at::Tensor & asin_outf(const at::Tensor & self, at::Tensor & out);
23
+ TORCH_API at::Tensor & asin_(at::Tensor & self);
24
+
25
+ } // namespace cpu
26
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/atan_native.h ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+ #include <ATen/ops/atan_meta.h>
16
+
17
+ namespace at {
18
+ namespace native {
19
+ struct TORCH_API structured_atan_out : public at::meta::structured_atan {
20
+ void impl(const at::Tensor & self, const at::Tensor & out);
21
+ };
22
+ TORCH_API at::Tensor atan_sparse(const at::Tensor & self);
23
+ TORCH_API at::Tensor & atan_sparse_out(const at::Tensor & self, at::Tensor & out);
24
+ TORCH_API at::Tensor & atan_sparse_(at::Tensor & self);
25
+ TORCH_API at::Tensor atan_sparse_csr(const at::Tensor & self);
26
+ TORCH_API at::Tensor & atan_sparse_csr_out(const at::Tensor & self, at::Tensor & out);
27
+ TORCH_API at::Tensor & atan_sparse_csr_(at::Tensor & self);
28
+ } // namespace native
29
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/batch_norm_backward_elemt_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor
26
+ inline at::Tensor batch_norm_backward_elemt(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional<at::Tensor> & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count) {
27
+ return at::_ops::batch_norm_backward_elemt::call(grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count);
28
+ }
29
+
30
+ // aten::batch_norm_backward_elemt.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count, *, Tensor(a!) out) -> Tensor(a!)
31
+ inline at::Tensor & batch_norm_backward_elemt_out(at::Tensor & out, const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional<at::Tensor> & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count) {
32
+ return at::_ops::batch_norm_backward_elemt_out::call(grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count, out);
33
+ }
34
+ // aten::batch_norm_backward_elemt.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count, *, Tensor(a!) out) -> Tensor(a!)
35
+ inline at::Tensor & batch_norm_backward_elemt_outf(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional<at::Tensor> & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count, at::Tensor & out) {
36
+ return at::_ops::batch_norm_backward_elemt_out::call(grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count, out);
37
+ }
38
+
39
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_elemt.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/batch_norm_elemt_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::batch_norm_elemt(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps) -> Tensor
26
+ inline at::Tensor batch_norm_elemt(const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const at::Tensor & mean, const at::Tensor & invstd, double eps) {
27
+ return at::_ops::batch_norm_elemt::call(input, weight, bias, mean, invstd, eps);
28
+ }
29
+
30
+ // aten::batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)
31
+ inline at::Tensor & batch_norm_elemt_out(at::Tensor & out, const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const at::Tensor & mean, const at::Tensor & invstd, double eps) {
32
+ return at::_ops::batch_norm_elemt_out::call(input, weight, bias, mean, invstd, eps, out);
33
+ }
34
+ // aten::batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)
35
+ inline at::Tensor & batch_norm_elemt_outf(const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const at::Tensor & mean, const at::Tensor & invstd, double eps, at::Tensor & out) {
36
+ return at::_ops::batch_norm_elemt_out::call(input, weight, bias, mean, invstd, eps, out);
37
+ }
38
+
39
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_stats_native.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> batch_norm_stats_out(const at::Tensor & input, double eps, at::Tensor & out0, at::Tensor & out1);
20
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor> batch_norm_stats_cuda(const at::Tensor & input, double eps);
21
+ } // namespace native
22
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/binary_cross_entropy_backward_ops.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API binary_cross_entropy_backward {
18
+ using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const c10::optional<at::Tensor> &, int64_t);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::binary_cross_entropy_backward")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor")
24
+ static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction);
25
+ static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction);
26
+ };
27
+
28
+ struct TORCH_API binary_cross_entropy_backward_grad_input {
29
+ using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, const c10::optional<at::Tensor> &, int64_t, at::Tensor &);
30
+ using ptr_schema = schema*;
31
+ // See Note [static constexpr char* members for windows NVCC]
32
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::binary_cross_entropy_backward")
33
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input")
34
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "binary_cross_entropy_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)")
35
+ static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction, at::Tensor & grad_input);
36
+ static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction, at::Tensor & grad_input);
37
+ };
38
+
39
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bmm_compositeexplicitautogradnonfunctional_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautogradnonfunctional {
19
+
20
+ TORCH_API at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2);
21
+
22
+ } // namespace compositeexplicitautogradnonfunctional
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/channel_shuffle.h ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/channel_shuffle_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::channel_shuffle(Tensor self, SymInt groups) -> Tensor
26
+ inline at::Tensor channel_shuffle(const at::Tensor & self, int64_t groups) {
27
+ return at::_ops::channel_shuffle::call(self, groups);
28
+ }
29
+ namespace symint {
30
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
31
+ at::Tensor channel_shuffle(const at::Tensor & self, int64_t groups) {
32
+ return at::_ops::channel_shuffle::call(self, groups);
33
+ }
34
+ }
35
+
36
+ // aten::channel_shuffle(Tensor self, SymInt groups) -> Tensor
37
+ inline at::Tensor channel_shuffle_symint(const at::Tensor & self, c10::SymInt groups) {
38
+ return at::_ops::channel_shuffle::call(self, groups);
39
+ }
40
+ namespace symint {
41
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
42
+ at::Tensor channel_shuffle(const at::Tensor & self, c10::SymInt groups) {
43
+ return at::_ops::channel_shuffle::call(self, groups);
44
+ }
45
+ }
46
+
47
+ // aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
48
+ inline at::Tensor & channel_shuffle_out(at::Tensor & out, const at::Tensor & self, int64_t groups) {
49
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
50
+ }
51
+ namespace symint {
52
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
53
+ at::Tensor & channel_shuffle_out(at::Tensor & out, const at::Tensor & self, int64_t groups) {
54
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
55
+ }
56
+ }
57
+
58
+ // aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
59
+ inline at::Tensor & channel_shuffle_outf(const at::Tensor & self, int64_t groups, at::Tensor & out) {
60
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
61
+ }
62
+ namespace symint {
63
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
64
+ at::Tensor & channel_shuffle_outf(const at::Tensor & self, int64_t groups, at::Tensor & out) {
65
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
66
+ }
67
+ }
68
+
69
+ // aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
70
+ inline at::Tensor & channel_shuffle_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymInt groups) {
71
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
72
+ }
73
+ namespace symint {
74
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
75
+ at::Tensor & channel_shuffle_out(at::Tensor & out, const at::Tensor & self, c10::SymInt groups) {
76
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
77
+ }
78
+ }
79
+
80
+ // aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
81
+ inline at::Tensor & channel_shuffle_symint_outf(const at::Tensor & self, c10::SymInt groups, at::Tensor & out) {
82
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
83
+ }
84
+ namespace symint {
85
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
86
+ at::Tensor & channel_shuffle_outf(const at::Tensor & self, c10::SymInt groups, at::Tensor & out) {
87
+ return at::_ops::channel_shuffle_out::call(self, groups, out);
88
+ }
89
+ }
90
+
91
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/col2im_native.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor col2im_cpu(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride);
20
+ TORCH_API at::Tensor & col2im_out_cpu(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out);
21
+ TORCH_API at::Tensor col2im_cuda(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride);
22
+ TORCH_API at::Tensor & col2im_out_cuda(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out);
23
+ } // namespace native
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/crow_indices.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/crow_indices_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+
26
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose.h ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/cudnn_convolution_transpose_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
26
+ inline at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
27
+ return at::_ops::cudnn_convolution_transpose::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32);
28
+ }
29
+ namespace symint {
30
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
31
+ at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
32
+ return at::_ops::cudnn_convolution_transpose::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32);
33
+ }
34
+ }
35
+
36
+ // aten::cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
37
+ inline at::Tensor cudnn_convolution_transpose_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
38
+ return at::_ops::cudnn_convolution_transpose::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
39
+ }
40
+ namespace symint {
41
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
42
+ at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
43
+ return at::_ops::cudnn_convolution_transpose::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
44
+ }
45
+ }
46
+
47
+ // aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
48
+ inline at::Tensor & cudnn_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
49
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
50
+ }
51
+ namespace symint {
52
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
53
+ at::Tensor & cudnn_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
54
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
55
+ }
56
+ }
57
+
58
+ // aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
59
+ inline at::Tensor & cudnn_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
60
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
61
+ }
62
+ namespace symint {
63
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
64
+ at::Tensor & cudnn_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
65
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
66
+ }
67
+ }
68
+
69
+ // aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
70
+ inline at::Tensor & cudnn_convolution_transpose_symint_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
71
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
72
+ }
73
+ namespace symint {
74
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
75
+ at::Tensor & cudnn_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
76
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
77
+ }
78
+ }
79
+
80
+ // aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
81
+ inline at::Tensor & cudnn_convolution_transpose_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
82
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
83
+ }
84
+ namespace symint {
85
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
86
+ at::Tensor & cudnn_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
87
+ return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
88
+ }
89
+ }
90
+
91
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dist_native.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor dist(const at::Tensor & self, const at::Tensor & other, const at::Scalar & p=2);
20
+ TORCH_API at::Tensor & dist_out(const at::Tensor & self, const at::Tensor & other, const at::Scalar & p, at::Tensor & out);
21
+ } // namespace native
22
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/embedding_native.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor embedding_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx=-1, bool scale_grad_by_freq=false, bool sparse=false);
20
+ TORCH_API at::Tensor & embedding_out_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse, at::Tensor & out);
21
+ TORCH_API at::Tensor NestedTensor_embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx=-1, bool scale_grad_by_freq=false, bool sparse=false);
22
+ } // namespace native
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/erfc_meta_dispatch.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace meta {
19
+
20
+ TORCH_API at::Tensor erfc(const at::Tensor & self);
21
+ TORCH_API at::Tensor & erfc_out(at::Tensor & out, const at::Tensor & self);
22
+ TORCH_API at::Tensor & erfc_outf(const at::Tensor & self, at::Tensor & out);
23
+ TORCH_API at::Tensor & erfc_(at::Tensor & self);
24
+
25
+ } // namespace meta
26
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_fft.h ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/fft_fft_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
26
+ inline at::Tensor fft_fft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
27
+ return at::_ops::fft_fft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
28
+ }
29
+ namespace symint {
30
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
31
+ at::Tensor fft_fft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
32
+ return at::_ops::fft_fft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
33
+ }
34
+ }
35
+
36
+ // aten::fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
37
+ inline at::Tensor fft_fft_symint(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
38
+ return at::_ops::fft_fft::call(self, n, dim, norm);
39
+ }
40
+ namespace symint {
41
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
42
+ at::Tensor fft_fft(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
43
+ return at::_ops::fft_fft::call(self, n, dim, norm);
44
+ }
45
+ }
46
+
47
+ // aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
48
+ inline at::Tensor & fft_fft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
49
+ return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
50
+ }
51
+ namespace symint {
52
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
53
+ at::Tensor & fft_fft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
54
+ return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
55
+ }
56
+ }
57
+
58
+ // aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
59
+ inline at::Tensor & fft_fft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
60
+ return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
61
+ }
62
+ namespace symint {
63
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
64
+ at::Tensor & fft_fft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
65
+ return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
66
+ }
67
+ }
68
+
69
+ // aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
70
+ inline at::Tensor & fft_fft_symint_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
71
+ return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
72
+ }
73
+ namespace symint {
74
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
75
+ at::Tensor & fft_fft_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
76
+ return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
77
+ }
78
+ }
79
+
80
+ // aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
81
+ inline at::Tensor & fft_fft_symint_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
82
+ return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
83
+ }
84
+ namespace symint {
85
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
86
+ at::Tensor & fft_fft_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
87
+ return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
88
+ }
89
+ }
90
+
91
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/fractional_max_pool2d_backward_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
26
+ inline at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) {
27
+ return at::_ops::fractional_max_pool2d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input);
28
+ }
29
+ // aten::fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
30
+ inline at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input) {
31
+ return at::_ops::fractional_max_pool2d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input);
32
+ }
33
+
34
+ // aten::fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
35
+ inline at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) {
36
+ return at::_ops::fractional_max_pool2d_backward::call(grad_output, self, kernel_size, output_size, indices);
37
+ }
38
+
39
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gather_cuda_dispatch.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API at::Tensor gather(const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad=false);
21
+ TORCH_API at::Tensor & gather_out(at::Tensor & out, const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad=false);
22
+ TORCH_API at::Tensor & gather_outf(const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad, at::Tensor & out);
23
+
24
+ } // namespace cuda
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge.h ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/ge_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
26
+ inline at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other) {
27
+ return at::_ops::ge_Scalar_out::call(self, other, out);
28
+ }
29
+ // aten::ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
30
+ inline at::Tensor & ge_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
31
+ return at::_ops::ge_Scalar_out::call(self, other, out);
32
+ }
33
+
34
+ // aten::ge.Scalar(Tensor self, Scalar other) -> Tensor
35
+ inline at::Tensor ge(const at::Tensor & self, const at::Scalar & other) {
36
+ return at::_ops::ge_Scalar::call(self, other);
37
+ }
38
+
39
+ // aten::ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
40
+ inline at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
41
+ return at::_ops::ge_Tensor_out::call(self, other, out);
42
+ }
43
+ // aten::ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
44
+ inline at::Tensor & ge_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
45
+ return at::_ops::ge_Tensor_out::call(self, other, out);
46
+ }
47
+
48
+ // aten::ge.Tensor(Tensor self, Tensor other) -> Tensor
49
+ inline at::Tensor ge(const at::Tensor & self, const at::Tensor & other) {
50
+ return at::_ops::ge_Tensor::call(self, other);
51
+ }
52
+
53
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge_cpu_dispatch.h ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API at::Tensor ge(const at::Tensor & self, const at::Scalar & other);
21
+ TORCH_API at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other);
22
+ TORCH_API at::Tensor & ge_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out);
23
+ TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Scalar & other);
24
+ TORCH_API at::Tensor ge(const at::Tensor & self, const at::Tensor & other);
25
+ TORCH_API at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
26
+ TORCH_API at::Tensor & ge_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
27
+ TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Tensor & other);
28
+
29
+ } // namespace cpu
30
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_compositeexplicitautogradnonfunctional_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautogradnonfunctional {
19
+
20
+ TORCH_API at::Tensor gelu_backward(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate="none");
21
+
22
+ } // namespace compositeexplicitautogradnonfunctional
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hsplit_ops.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API hsplit_int {
18
+ using schema = ::std::vector<at::Tensor> (const at::Tensor &, int64_t);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::hsplit")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "int")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]")
24
+ static ::std::vector<at::Tensor> call(const at::Tensor & self, int64_t sections);
25
+ static ::std::vector<at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t sections);
26
+ };
27
+
28
+ struct TORCH_API hsplit_array {
29
+ using schema = ::std::vector<at::Tensor> (const at::Tensor &, at::IntArrayRef);
30
+ using ptr_schema = schema*;
31
+ // See Note [static constexpr char* members for windows NVCC]
32
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::hsplit")
33
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "array")
34
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]")
35
+ static ::std::vector<at::Tensor> call(const at::Tensor & self, at::IntArrayRef indices);
36
+ static ::std::vector<at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef indices);
37
+ };
38
+
39
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/indices.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/indices_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+
26
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/item_native.h ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Scalar item(const at::Tensor & self);
20
+ } // namespace native
21
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kl_div.h ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/kl_div_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
26
+ inline at::Tensor kl_div(const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean, bool log_target=false) {
27
+ return at::_ops::kl_div::call(self, target, reduction, log_target);
28
+ }
29
+
30
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kthvalue_cpu_dispatch.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> kthvalue_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t k, int64_t dim=-1, bool keepdim=false);
21
+ TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> kthvalue_outf(const at::Tensor & self, int64_t k, int64_t dim, bool keepdim, at::Tensor & values, at::Tensor & indices);
22
+
23
+ } // namespace cpu
24
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API at::Tensor lgamma(const at::Tensor & self);
21
+ TORCH_API at::Tensor & lgamma_out(at::Tensor & out, const at::Tensor & self);
22
+ TORCH_API at::Tensor & lgamma_outf(const at::Tensor & self, at::Tensor & out);
23
+ TORCH_API at::Tensor & lgamma_(at::Tensor & self);
24
+
25
+ } // namespace cpu
26
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/linalg_matmul_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::linalg_matmul(Tensor self, Tensor other) -> Tensor
26
+ inline at::Tensor linalg_matmul(const at::Tensor & self, const at::Tensor & other) {
27
+ return at::_ops::linalg_matmul::call(self, other);
28
+ }
29
+
30
+ // aten::linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
31
+ inline at::Tensor & linalg_matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
32
+ return at::_ops::linalg_matmul_out::call(self, other, out);
33
+ }
34
+ // aten::linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
35
+ inline at::Tensor & linalg_matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
36
+ return at::_ops::linalg_matmul_out::call(self, other, out);
37
+ }
38
+
39
+ }