koichi12 commited on
Commit
1635328
·
verified ·
1 Parent(s): 1105a93

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc +0 -0
  2. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h +22 -0
  3. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h +46 -0
  4. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h +26 -0
  5. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h +183 -0
  6. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h +69 -0
  7. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h +0 -0
  8. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h +53 -0
  9. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h +2 -0
  10. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h +153 -0
  11. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h +243 -0
  12. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h +26 -0
  13. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h +21 -0
  14. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h +28 -0
  15. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h +23 -0
  16. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h +21 -0
  17. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h +25 -0
  18. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h +27 -0
  19. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h +25 -0
  20. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h +39 -0
  21. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h +39 -0
  22. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h +22 -0
  23. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h +23 -0
  24. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h +28 -0
  25. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h +27 -0
  26. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h +30 -0
  27. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h +28 -0
  28. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h +27 -0
  29. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h +50 -0
  30. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h +28 -0
  31. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h +23 -0
  32. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h +91 -0
  33. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h +22 -0
  34. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h +22 -0
  35. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h +44 -0
  36. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h +33 -0
  37. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h +23 -0
  38. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h +39 -0
  39. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h +23 -0
  40. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h +25 -0
  41. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h +25 -0
  42. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h +26 -0
  43. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
  44. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h +22 -0
  45. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h +27 -0
  46. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h +25 -0
  47. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h +30 -0
  48. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h +39 -0
  49. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h +28 -0
  50. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/kernel/__pycache__/bmm.cpython-311.pyc ADDED
Binary file (5.36 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Config.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's
4
+ // obvious if you forgot to include Config.h
5
+ // c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined
6
+ //
7
+ // DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h
8
+
9
+ #define AT_MKLDNN_ENABLED() 1
10
+ #define AT_MKLDNN_ACL_ENABLED() 0
11
+ #define AT_MKL_ENABLED() 1
12
+ #define AT_MKL_SEQUENTIAL() 0
13
+ #define AT_POCKETFFT_ENABLED() 0
14
+ #define AT_NNPACK_ENABLED() 1
15
+ #define CAFFE2_STATIC_LINK_CUDA() 0
16
+ #define AT_BUILD_WITH_BLAS() 1
17
+ #define AT_BUILD_WITH_LAPACK() 1
18
+ #define AT_PARALLEL_OPENMP 1
19
+ #define AT_PARALLEL_NATIVE 0
20
+ #define AT_PARALLEL_NATIVE_TBB 0
21
+ #define AT_BLAS_F2C() 0
22
+ #define AT_BLAS_USE_CBLAS_DOT() 0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/FuncTorchTLS.h ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <c10/macros/Macros.h>
4
+ #include <memory>
5
+
6
+ namespace at::functorch {
7
+
8
+ // NOTE [functorch TLS in pytorch/pytorch]
9
+ //
10
+ // functorch lives out-of-tree. However, it has some TLS that needs to be
11
+ // propagated. The solution for that is we store a pointer to the TLS
12
+ // inside pytorch/pytorch and extend FuncTorchTLSBase inside functorch to
13
+ // include whatever functorch needs.
14
+ //
15
+ // We need to store a pointer due to the indirection:
16
+ // inside functorch, we will create a subclass of FunctorchTLSBase called
17
+ // FuncTorchTLSImpl that actually contains metadata, like the DynamicLayerStack.
18
+ // FuncTorchTLSBase doesn't have any metadata because it hasn't been defined
19
+ // yet.
20
+ //
21
+ // Here in pytorch/pytorch, we will pass around FuncTorchTLSBase*, but inside
22
+ // functorch, we will assign a FuncTorchTLSImpl* to the FunctorchTLSBase*.
23
+ // We can't directly pass around FunctorchTLSBase (without a pointer) because
24
+ // FuncTorchTLSImpl does not fit inside a FuncTorchTLSBase by virtue of having
25
+ // more elements.
26
+ struct TORCH_API FuncTorchTLSBase {
27
+ virtual ~FuncTorchTLSBase() = default;
28
+ virtual std::unique_ptr<FuncTorchTLSBase> deepcopy() const = 0;
29
+
30
+ virtual int64_t checkSupportsSingleLevelAutogradFunction() const = 0;
31
+ virtual void checkSupportsCppAutogradFunction() const = 0;
32
+ virtual void checkSupportsInplaceRequiresGrad() const = 0;
33
+ virtual void checkSupportsRetainGrad() const = 0;
34
+ };
35
+
36
+ // returns deepcopy of the functorch tls
37
+ TORCH_API std::unique_ptr<FuncTorchTLSBase> getCopyOfFuncTorchTLS();
38
+
39
+ // sets the functorch tls. always does a deep copy.
40
+ TORCH_API void setFuncTorchTLS(
41
+ const std::shared_ptr<const FuncTorchTLSBase>& state);
42
+
43
+ // get a mutable reference to the functorch tls
44
+ TORCH_API std::unique_ptr<FuncTorchTLSBase>& functorchTLSAccessor();
45
+
46
+ } // namespace at::functorch
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapMode.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <c10/core/impl/LocalDispatchKeySet.h>
4
+
5
+ namespace at::impl {
6
+
7
+ // VmapMode contains a thread local count of how many nested vmaps
8
+ // we are currently inside. That number is known as the `vmap level`.
9
+ // VmapMode is used in the implementation of the Python `torch.vmap` API.
10
+ //
11
+ // NOTE: this is NOT the c++ api for torch.vmap. That doesn't exist yet.
12
+
13
+ struct TORCH_API VmapMode {
14
+ // Returns the vmap level, aka the count of how many nested vmaps we're in.
15
+ static int64_t current_vmap_level();
16
+
17
+ // Increment the count of nested vmaps. If this causes the vmap level to be
18
+ // greater than 0, then it enables DispatchKey::VmapMode on all tensors.
19
+ static int64_t increment_nesting();
20
+
21
+ // Decrements the count of nested vmaps. If this causes the vmap level to be
22
+ // equal to 0, then it disables DispatchKey::VmapMode on all tensors.
23
+ static int64_t decrement_nesting();
24
+ };
25
+
26
+ } // namespace at::impl
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/LegacyVmapTransforms.h ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/LegacyBatchedTensorImpl.h>
4
+ #include <ATen/core/IListRef.h>
5
+
6
+ namespace at {
7
+
8
+ // This file contains abstractions used for transforming *logical* vmap
9
+ // arguments into *physical* arguments. (Keep reading for definitions of these
10
+ // terms).
11
+
12
+ // NOTE: [Logical vs physical args]
13
+ // Consider the following vmap.
14
+ // vmap(vmap(func, in_dims=(2,)), in_dims=(0,))(torch.ones(2, 3, 4))
15
+ // This would produce a BatchedTensor wrapping a Tensor of size [2, 3, 4],
16
+ // with batch dims 0 and 2:
17
+ // BatchedTensor(ones(2, 3, 4), bdims=[(lvl=1,dim=0),(lvl=2,dim=2)])
18
+ //
19
+ // We say the *logical* view of the tensor has size [3] -- tensors inside
20
+ // `func` appear to have size [3].
21
+ // However, the *physical* underlying tensor (the one passed to vmap) has size
22
+ // [2, 3, 4].
23
+ //
24
+ // This notion of logical vs physical also extends to non-tensor arguments.
25
+ // Consider the previous tensor; let's assume the user called
26
+ // `torch.sum(tensor, dim=0)` inside of `func`. Then the logical
27
+ // dimension they are reducing over is dim 0 but the physical dim is dim 1
28
+ // (the first non-batch dimension)
29
+
30
+ // Forward declared; see NOTE: [What is a VmapPhysicalView?]
31
+ struct VmapPhysicalView;
32
+
33
+ // Most PyTorch operators take 4 or fewer inputs.
34
+ constexpr int64_t kVmapTransformStaticInputSize = 4;
35
+ using VmapPhysicalViewVec =
36
+ SmallVector<VmapPhysicalView, kVmapTransformStaticInputSize>;
37
+
38
+ // Pytorch generally advertises good performance for <= 5 dims.
39
+ // (see ATen/core/DimVector.h). We add a few extra dims (~3) for vmap
40
+ // dimensions to get 8. Adjust this number as necessary
41
+ constexpr int64_t kVmapStaticDimVecSize = 8;
42
+ using VmapDimVector = SmallVector<int64_t, kVmapStaticDimVecSize>;
43
+ using VmapSymDimVector = SmallVector<c10::SymInt, kVmapStaticDimVecSize>;
44
+
45
+ // NOTE: [What is an VmapTransform?]
46
+ // An *VmapTransform* converts logical views of tensors to physical views.
47
+ //
48
+ // Batching rules use VmapTransforms to convert logical arguments to
49
+ // physical arguments, then call one or more at:: operator that handles the
50
+ // physical arguments, and then converts the physical result back to a logical
51
+ // argument.
52
+
53
+ // VmapTransform for operators that take tensors with multiple batch dims.
54
+ // Given one or more logical views on Tensors, `logicalToPhysical`
55
+ // permutes all of the batch dims to the front of the tensor, aligns
56
+ // and expands the batch dims to match each other (according to their `level`),
57
+ // and returns a VmapPhysicalView on the tensor(s).
58
+ struct TORCH_API MultiBatchVmapTransform {
59
+ static VmapPhysicalView logicalToPhysical(const Tensor& logical_tensor);
60
+ static VmapPhysicalViewVec logicalToPhysical(ITensorListRef logical_tensors);
61
+ };
62
+
63
+ // VmapTransform for operators that broadcast all inputs.
64
+ // Given some logical views on Tensors, `logicalToPhysical`:
65
+ // - permutes all of the batch dims to the front of the tensors
66
+ // - aligns all the batch dims to the collective levels of all of the tensors.
67
+ // If a tensor does not have a batch dim for a vmap level, then it receives
68
+ // a size-one dimension for said level.
69
+ // - aligns the non-batch dims to have the same dimensionality, adding extra
70
+ // size-1 dimensions in between the batch dimensions and the non-batch
71
+ // dimensions so that the batch dimensions are lined up from the right.
72
+ //
73
+ // For example: given inputs of size (B, 2) and (B, 3, 2) where B is the batch
74
+ // dimension, BroadcastingVmapTransform returns VmapPhysicalViews that wrap
75
+ // tensors of size (B, 1, 2) and (B, 3, 2).
76
+ //
77
+ // Given inputs of size (B, 2) and (2,), BroadcastingVmapTransform returns
78
+ // VmapPhysicalViews wrapping tensors of size (B, 2) and (1, 2). We don't
79
+ // actually *need* to return a tensor of size (1, 2) for the second tensor
80
+ // because the broadcasting operation takes care of that for us, but we do
81
+ // it anyways to keep things simple.
82
+ struct TORCH_API BroadcastingVmapTransform {
83
+ static VmapPhysicalViewVec logicalToPhysical(TensorList logical_tensors);
84
+ };
85
+
86
+ // Forward declared, if you're reading this file head to toe, don't worry about
87
+ // it yet.
88
+ struct VmapPhysicalToLogicalMap;
89
+
90
+ // NOTE: [What is a VmapPhysicalView?]
91
+ // VmapPhysicalView represents a physical view on a Tensor.
92
+ //
93
+ // One can use it to further convert logical dimension indices, logical shapes,
94
+ // and more to their physical variants, or convert a new (physical) tensor into
95
+ // a logical BatchedTensor. (TODO(rzou): some of these are not yet implemented).
96
+ //
97
+ // VmapPhysicalView stores a physical tensor with all of its batch dimensions at
98
+ // the front and some levels that correspond to said batch dimensions.
99
+ //
100
+ // The levels bitset specifies which vmap levels correspond to the batch
101
+ // dimensions at the front of the tensor. In particular, the number of set bits
102
+ // corresponds to the number of batch dimensions on `tensor` and the rightmost
103
+ // bit of `levels` specifies the maximum number of nested vmaps we are in at
104
+ // this point in time.
105
+ // For example, given:
106
+ // physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5, 6), levels={1, 3})
107
+ //
108
+ // Rightmost bit of `levels` is 3 indicating the number of nested vmaps less
109
+ // than or equal to 3.
110
+ // bitset: 010100
111
+ // ^
112
+ // |
113
+ // levels: 012345
114
+ struct TORCH_API VmapPhysicalView {
115
+ VmapPhysicalView(Tensor&& tensor, std::bitset<kVmapNumLevels> levels)
116
+ : levels_(levels), tensor_(std::move(tensor)) {
117
+ TORCH_INTERNAL_ASSERT(!isBatchedTensor(tensor_));
118
+ }
119
+
120
+ Tensor& tensor() {
121
+ return tensor_;
122
+ }
123
+ const Tensor& tensor() const {
124
+ return tensor_;
125
+ }
126
+
127
+ // Maps logical dim indices to physical dim indices. Also does dim wrapping.
128
+ //
129
+ // For example, given:
130
+ // physical_view = VmapPhysicalView(tensor=ones(2, 3, 4, 5), levels={1, 3})
131
+ //
132
+ // Then physical_view.getPhysicalDims({0, 1}) returns {2, 3}.
133
+ // This is because the size of levels tell us that the first two dimensions
134
+ // of `tensor_` are batch dimensions, so a logical dim of `n` is actually
135
+ // a physical dim of `n + 2`.
136
+ VmapDimVector getPhysicalDims(OptionalIntArrayRef logical_dims) const;
137
+ int64_t getPhysicalDim(int64_t logical_dim) const;
138
+
139
+ // Returns a VmapPhysicalToLogicalMap object. This can be used for
140
+ // mapping a physical tensor to a new logical tensor (BatchedTensor)
141
+ VmapPhysicalToLogicalMap getPhysicalToLogicalMap() const;
142
+
143
+ // Maps a logical shape to a physical shape by pre-pending the batch
144
+ // sizes to the logical shape.
145
+ VmapDimVector getPhysicalShape(IntArrayRef logical_shape) const;
146
+
147
+ int64_t numBatchDims() const;
148
+
149
+ private:
150
+ int64_t numLogicalDims() const;
151
+
152
+ std::bitset<kVmapNumLevels> levels_;
153
+ Tensor tensor_;
154
+ };
155
+
156
+ // Convenience struct used for mapping a physical tensor (a non-BatchedTensor)
157
+ // to a logical one (BatchedTensor). It holds some levels that are used to do
158
+ // the mapping and assumes that the batch dimensions in the physical tensor all
159
+ // occur at the front of the tensor.
160
+ struct TORCH_API VmapPhysicalToLogicalMap {
161
+ VmapPhysicalToLogicalMap(std::bitset<kVmapNumLevels> levels)
162
+ : levels_(levels) {}
163
+
164
+ // Maps a physical tensor to a new logical tensor (BatchedTensor).
165
+ // Assumes that all of the "batch dimensions" are at the front
166
+ // of the physical tensor. For example, given:
167
+ // - x = rank-4 Tensor with size 2, 3, 5, 7
168
+ // - levels = (2, 4)
169
+ // Returns:
170
+ // - BatchedTensor(x, bdims=[(dim=0,lvl=2), (dim=1, lvl=4)])
171
+ Tensor apply(const Tensor& physical_tensor) const;
172
+
173
+ // Given a vector of physical tensors,
174
+ // 1. maps each tensor to a new logical tensor. Assumes that all of the
175
+ // "batch dimensions" are at the front of the physical tensors.
176
+ // 2. stores the new logical tensors back into the passed-in vector. This is
177
+ // to avoid additional dynamic allocations.
178
+ void applyInplace(std::vector<Tensor>& physical_tensors) const;
179
+
180
+ std::bitset<kVmapNumLevels> levels_;
181
+ };
182
+
183
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/OpMathType.h ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <c10/core/ScalarType.h>
4
+ #include <c10/util/BFloat16.h>
5
+ #include <c10/util/Exception.h>
6
+ #include <c10/util/Float8_e4m3fn.h>
7
+ #include <c10/util/Float8_e4m3fnuz.h>
8
+ #include <c10/util/Float8_e5m2.h>
9
+ #include <c10/util/Float8_e5m2fnuz.h>
10
+ #include <c10/util/Half.h>
11
+
12
+ namespace at {
13
+
14
+ // For FP16 or BFloat16 inputs, ops should perform internal math in FP32.
15
+ template <typename scalar_t>
16
+ struct OpMathType {
17
+ using type = scalar_t;
18
+ };
19
+ template <>
20
+ struct OpMathType<at::Half> {
21
+ using type = float;
22
+ };
23
+ template <>
24
+ struct OpMathType<at::BFloat16> {
25
+ using type = float;
26
+ };
27
+ template <>
28
+ struct OpMathType<at::Float8_e5m2> {
29
+ using type = float;
30
+ };
31
+ template <>
32
+ struct OpMathType<at::Float8_e4m3fn> {
33
+ using type = float;
34
+ };
35
+ template <>
36
+ struct OpMathType<at::Float8_e5m2fnuz> {
37
+ using type = float;
38
+ };
39
+ template <>
40
+ struct OpMathType<at::Float8_e4m3fnuz> {
41
+ using type = float;
42
+ };
43
+ template <>
44
+ struct OpMathType<c10::complex<Half>> {
45
+ using type = c10::complex<float>;
46
+ };
47
+
48
+ template <typename T>
49
+ using opmath_type = typename OpMathType<T>::type;
50
+
51
+ namespace {
52
+
53
+ inline c10::ScalarType toOpMathType(const c10::ScalarType type) {
54
+ switch (type) {
55
+ #define DEFINE_CASE(scalar_t, TypeNum) \
56
+ case ScalarType::TypeNum: \
57
+ return CppTypeToScalarType<at::opmath_type<scalar_t>>::value;
58
+
59
+ AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CASE)
60
+ #undef DEFINE_CASE
61
+
62
+ default:
63
+ TORCH_INTERNAL_ASSERT(false, "Unrecognized ScalarType: ", type);
64
+ }
65
+ }
66
+
67
+ } // namespace
68
+
69
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/RegistrationDeclarations.h ADDED
The diff for this file is too large to render. See raw diff
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ScalarOps.h ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/Tensor.h>
4
+ #include <c10/core/Scalar.h>
5
+
6
+ #ifndef AT_PER_OPERATOR_HEADERS
7
+ #include <ATen/Functions.h>
8
+ #else
9
+ #include <ATen/ops/scalar_tensor.h>
10
+ #endif
11
+
12
+ namespace at::detail {
13
+ // When filling a number to 1-element CPU tensor, we want to skip
14
+ // everything but manipulate data ptr directly.
15
+ // Ideally this fast pass should be implemented in TensorIterator,
16
+ // but we also want to skip compute_types which in not avoidable
17
+ // in TensorIterator for now.
18
+ Tensor& scalar_fill(Tensor& self, const Scalar& value);
19
+ TORCH_API Tensor scalar_tensor_static(
20
+ const Scalar& s,
21
+ c10::optional<ScalarType> dtype_opt,
22
+ c10::optional<Device> device_opt);
23
+ } // namespace at::detail
24
+
25
+ // This is in the c10 namespace because we use ADL to find the functions in it.
26
+ namespace c10 {
27
+
28
+ // FIXME: this should be (and was) Scalar::toTensor, but there is currently no
29
+ // way to implement this without going through Derived Types (which are not part
30
+ // of core).
31
+ inline at::Tensor scalar_to_tensor(
32
+ const Scalar& s,
33
+ const Device device = at::kCPU) {
34
+ // This is the fast track we have for CPU scalar tensors.
35
+ if (device == at::kCPU) {
36
+ return at::detail::scalar_tensor_static(s, s.type(), at::kCPU);
37
+ }
38
+ return at::scalar_tensor(s, at::device(device).dtype(s.type()));
39
+ }
40
+
41
+ } // namespace c10
42
+
43
+ namespace at::native {
44
+
45
+ inline Tensor wrapped_scalar_tensor(
46
+ const Scalar& scalar,
47
+ const Device device = at::kCPU) {
48
+ auto tensor = scalar_to_tensor(scalar, device);
49
+ tensor.unsafeGetTensorImpl()->set_wrapped_number(true);
50
+ return tensor;
51
+ }
52
+
53
+ } // namespace at::native
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/Storage.h ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #pragma once
2
+ #include <c10/core/Storage.h>
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/WrapDimUtils.h ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/core/IListRef.h>
4
+ #include <ATen/core/Tensor.h>
5
+ #include <c10/core/TensorImpl.h>
6
+ #include <c10/core/WrapDimMinimal.h>
7
+ #include <c10/util/irange.h>
8
+
9
+ namespace at {
10
+
11
+ // if dim_post_expr is 0 and wrap_scalar is true, then dim must be in the
12
+ // range [-1, 0]. This is a special case for scalar tensors and manifests in
13
+ // e.g. torch.sum(scalar_tensor, 0) Otherwise, dim should be in the range
14
+ // [-dim_post_expr, dim_post_expr-1].
15
+ using c10::maybe_wrap_dim;
16
+
17
+ inline int64_t maybe_wrap_dim(int64_t dim, TensorImpl* tensor) {
18
+ return maybe_wrap_dim(dim, tensor->dim());
19
+ }
20
+
21
+ inline int64_t maybe_wrap_dim(int64_t dim, TensorList tensors) {
22
+ if (tensors.empty()) {
23
+ // can't wrap empty TensorList; rely on underlying implementation to throw
24
+ // error if necessary.
25
+ return dim;
26
+ }
27
+ return maybe_wrap_dim(dim, tensors[0].dim());
28
+ }
29
+
30
+ inline int64_t maybe_wrap_dim(
31
+ int64_t dim,
32
+ const std::vector<std::vector<int64_t>>& tensor_sizes) {
33
+ if (tensor_sizes.empty()) {
34
+ // can't wrap empty list; rely on underlying implementation to throw error
35
+ // if necessary
36
+ return dim;
37
+ }
38
+ return maybe_wrap_dim(dim, tensor_sizes[0].size());
39
+ }
40
+
41
+ // Given an array of dimensions `dims` of length `ndims`, this function "Wraps"
42
+ // each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be
43
+ // specified using negative indices.
44
+ //
45
+ // Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will
46
+ // allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for
47
+ // dimensions not in the range [-dim_post_expr, dim_post_expr).
48
+ inline void maybe_wrap_dims_n(
49
+ int64_t* dims,
50
+ int64_t ndims,
51
+ int64_t dim_post_expr,
52
+ bool wrap_scalars = true) {
53
+ if (dim_post_expr <= 0) {
54
+ if (wrap_scalars) {
55
+ dim_post_expr = 1; // this will make range [-1, 0]
56
+ } else {
57
+ TORCH_CHECK_INDEX(
58
+ ndims == 0,
59
+ "Dimension specified as ",
60
+ dims[0],
61
+ " but tensor has no dimensions");
62
+ return;
63
+ }
64
+ }
65
+ int64_t min = -dim_post_expr;
66
+ int64_t max = dim_post_expr - 1;
67
+ for (const auto i : c10::irange(ndims)) {
68
+ auto& dim = dims[i];
69
+ if (dim < min || dim > max) {
70
+ TORCH_CHECK_INDEX(
71
+ false,
72
+ "Dimension out of range (expected to be in range of [",
73
+ min,
74
+ ", ",
75
+ max,
76
+ "], but got ",
77
+ dim,
78
+ ")");
79
+ }
80
+ if (dim < 0)
81
+ dim += dim_post_expr;
82
+ }
83
+ }
84
+
85
+ // Given a contiguous container of dimensions `dims`, this function "Wraps"
86
+ // each dim in-place for a tensor of rank `dim_post_expr`, allowing dims to be
87
+ // specified using negative indices.
88
+ //
89
+ // Additionally, if `wrap_scalar` is true then scalar tensors with rank 0, will
90
+ // allow dimensions in the range [-1, 0]. Otherwise, an IndexError is raised for
91
+ // dimensions not in the range [-dim_post_expr, dim_post_expr).
92
+ template <typename Container>
93
+ inline void maybe_wrap_dims(
94
+ Container& dims,
95
+ int64_t dim_post_expr,
96
+ bool wrap_scalars = true) {
97
+ return maybe_wrap_dims_n(
98
+ dims.data(), dims.size(), dim_post_expr, wrap_scalars);
99
+ }
100
+
101
+ // previously, size [0] tensors were the only possible empty tensors; thus, it
102
+ // wasn't possible to cat empty tensors unless all the other tensors were
103
+ // 1-dimensional, so we allowed these tensors to be "skipped" (both for wrap
104
+ // dimension behavior and dimension size checking). We maintain this behavior
105
+ // for backwards compatibility, but only for this specific size (i.e. other
106
+ // empty sizes are not skipped).
107
+ template <typename T>
108
+ inline int64_t _legacy_cat_wrap_dim(
109
+ int64_t dim,
110
+ const std::vector<std::vector<T>>& tensor_sizes) {
111
+ for (auto& sizes : tensor_sizes) {
112
+ if (sizes.size() == 1 && sizes[0] == 0) {
113
+ continue;
114
+ }
115
+ return maybe_wrap_dim(dim, sizes.size());
116
+ }
117
+ return dim;
118
+ }
119
+
120
+ inline int64_t legacy_cat_wrap_dim(
121
+ int64_t dim,
122
+ const std::vector<std::vector<int64_t>>& tensor_sizes) {
123
+ return _legacy_cat_wrap_dim<int64_t>(dim, tensor_sizes);
124
+ }
125
+
126
+ inline int64_t legacy_cat_wrap_dim_symint(
127
+ int64_t dim,
128
+ const std::vector<std::vector<c10::SymInt>>& tensor_sizes) {
129
+ return _legacy_cat_wrap_dim<c10::SymInt>(dim, tensor_sizes);
130
+ }
131
+
132
+ inline int64_t legacy_cat_wrap_dim(
133
+ int64_t dim,
134
+ const MaterializedITensorListRef& tensors) {
135
+ for (const Tensor& tensor : tensors) {
136
+ if (tensor.dim() == 1 && tensor.sizes()[0] == 0) {
137
+ continue;
138
+ }
139
+ return maybe_wrap_dim(dim, tensor.dim());
140
+ }
141
+ return dim;
142
+ }
143
+
144
+ // wrap negative dims in a vector
145
+ inline void wrap_all_dims(
146
+ std::vector<int64_t>& dims_to_wrap,
147
+ int64_t tensor_total_dims) {
148
+ for (const auto i : c10::irange(dims_to_wrap.size())) {
149
+ dims_to_wrap[i] = maybe_wrap_dim(dims_to_wrap[i], tensor_total_dims);
150
+ }
151
+ }
152
+
153
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/code_template.h ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <c10/util/irange.h>
4
+
5
+ #include <sstream>
6
+ #include <string>
7
+ #include <unordered_map>
8
+ #include <vector>
9
+
10
+ namespace at::jit {
11
+
12
+ // A template environment is a mapping from template variable names, e.g.,
13
+ // identifier (corresponding to $identifier) to their expansions.
14
+ //
15
+ // This template environment supports storing strings, numbers and lists
16
+ // of strings, and can be chained together (so that lookup proceeds in
17
+ // in the top level environment, and then recurses into a parent
18
+ // environment if the key is not found.)
19
+ struct TemplateEnv {
20
+ TemplateEnv() = default;
21
+ TemplateEnv(TemplateEnv& parent) : parent(&parent) {}
22
+
23
+ using string_list = std::vector<std::string>;
24
+
25
+ // Add a string 'v' to the map at key 'k'.
26
+ void s(const std::string& k, const std::string& v) {
27
+ strings_[k] = v;
28
+ lists_.erase(k);
29
+ }
30
+
31
+ // Add a number 'v' to the map at key 'k'
32
+ template <typename T>
33
+ void d(const std::string& k, const T& v) {
34
+ strings_[k] = c10::to_string(v);
35
+ lists_.erase(k);
36
+ }
37
+
38
+ // Retrieve the string representation of the value stored at 'k' from the map.
39
+ // Raises an exception if the key is not found.
40
+ const std::string& s(const std::string& k) const {
41
+ if (strings_.count(k) == 0) {
42
+ if (parent) {
43
+ return parent->s(k);
44
+ }
45
+ notFound(k);
46
+ }
47
+ return strings_.at(k);
48
+ }
49
+
50
+ // Store a list of strings 'v' in the map at 'k'.
51
+ void v(const std::string& k, const string_list& v) {
52
+ lists_[k] = v;
53
+ strings_.erase(k);
54
+ }
55
+
56
+ // Retrieve a list of strings stored at 'k' from the map.
57
+ // Raises an exception if the key is not found.
58
+ const string_list& v(const std::string& k) const {
59
+ if (lists_.count(k) == 0) {
60
+ if (parent) {
61
+ return parent->v(k);
62
+ }
63
+ notFound(k);
64
+ }
65
+ return lists_.at(k);
66
+ }
67
+
68
+ // Test if a string 'k' is a string (as opposed to a list.)
69
+ bool keyIsString(const std::string& k) const {
70
+ if (strings_.count(k) > 0)
71
+ return true;
72
+ if (lists_.count(k) > 0)
73
+ return false;
74
+ if (parent)
75
+ return parent->keyIsString(k);
76
+ notFound(k);
77
+ }
78
+
79
+ private:
80
+ [[noreturn]] void notFound(const std::string& k) const {
81
+ std::stringstream ss;
82
+ ss << "key not found: " << k;
83
+ throw std::logic_error(ss.str());
84
+ }
85
+
86
+ std::unordered_map<std::string, std::string> strings_;
87
+ std::unordered_map<std::string, string_list> lists_;
88
+ TemplateEnv* parent{nullptr};
89
+ };
90
+
91
+ /*
92
+ # Match $identifier or ${identifier} and replace with the value in env.
93
+ # If this identifier is at the beginning of whitespace on a line
94
+ # and its value is a list then it is treated as
95
+ # block substitution by indenting all lines of all elements.
96
+ # If the identifier is on a line starting with non-whitespace and a list
97
+ # then it is comma separated. ${,foo} will insert a comma before the list
98
+ # if this list is not empty and ${foo,} will insert one after.
99
+ */
100
+ struct CodeTemplate {
101
+ /* implicit */ CodeTemplate(std::string t) : template_text(std::move(t)) {}
102
+
103
+ std::string format(const TemplateEnv& env) const {
104
+ std::stringstream out;
105
+ size_t pos = 0;
106
+ size_t indent = 0;
107
+ bool all_whitespace = true;
108
+ while (pos < template_text.size()) {
109
+ char c = template_text[pos];
110
+ if (c == '$') {
111
+ std::stringstream kss;
112
+ // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
113
+ bool comma_before;
114
+ // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
115
+ bool comma_after;
116
+ size_t new_pos = parseKey(pos, kss, comma_before, comma_after);
117
+ std::string k = kss.str();
118
+ bool is_string = env.keyIsString(k);
119
+ if (all_whitespace) {
120
+ if (is_string)
121
+ emitStringWithIndents(out, indent, env.s(k));
122
+ else
123
+ emitLinesIndented(out, indent, env.v(k));
124
+ } else {
125
+ if (is_string)
126
+ out << env.s(k);
127
+ else
128
+ emitCommaSeparatedList(out, env.v(k), comma_before, comma_after);
129
+ }
130
+ all_whitespace = false;
131
+ pos = new_pos;
132
+ } else {
133
+ out << c;
134
+ if (!isspace(c))
135
+ all_whitespace = false;
136
+ indent++;
137
+ if (c == '\n') {
138
+ indent = 0;
139
+ all_whitespace = true;
140
+ }
141
+ pos++;
142
+ }
143
+ }
144
+ return out.str();
145
+ }
146
+
147
+ private:
148
+ using string_list = std::vector<std::string>;
149
+ char charAt(size_t p) const {
150
+ if (p >= template_text.size())
151
+ throw std::logic_error("EOS found in key");
152
+ return template_text[p];
153
+ }
154
+ size_t parseKey(
155
+ size_t pos,
156
+ std::ostream& k,
157
+ bool& comma_before,
158
+ bool& comma_after) const {
159
+ comma_before = false;
160
+ comma_after = false;
161
+ pos++;
162
+ if (charAt(pos) == '{') {
163
+ pos++;
164
+ if (charAt(pos) == ',') {
165
+ comma_before = true;
166
+ pos++;
167
+ }
168
+ pos = parseIdent(pos, k);
169
+ if (charAt(pos) == ',') {
170
+ comma_after = true;
171
+ pos++;
172
+ }
173
+ if (charAt(pos) != '}')
174
+ throw std::logic_error("missing terminating '}'");
175
+ pos++;
176
+ return pos;
177
+ } else {
178
+ return parseIdent(pos, k);
179
+ }
180
+ }
181
+ size_t parseIdent(size_t pos, std::ostream& k) const {
182
+ while (pos < template_text.size() &&
183
+ (isalnum(template_text[pos]) || template_text[pos] == '_')) {
184
+ k << template_text[pos];
185
+ pos++;
186
+ }
187
+ return pos;
188
+ }
189
+ void emitCommaSeparatedList(
190
+ std::ostream& out,
191
+ const string_list& strings,
192
+ bool comma_before,
193
+ bool comma_after) const {
194
+ if (comma_before && !strings.empty())
195
+ out << ", ";
196
+ for (const auto i : c10::irange(strings.size())) {
197
+ if (i > 0)
198
+ out << ", ";
199
+ out << strings[i];
200
+ }
201
+ if (comma_after && !strings.empty())
202
+ out << ", ";
203
+ }
204
+ // These indentation functions follow the convention that they never emit
205
+ // leading or trailing newlines when the input string does not have leading
206
+ // or trailing newlines. It's the responsibility of the calling function
207
+ // to indent correctly in the context.
208
+ void emitIndent(std::ostream& out, size_t indent) const {
209
+ for (C10_UNUSED const auto i : c10::irange(indent)) {
210
+ out << " ";
211
+ }
212
+ }
213
+ void emitStringWithIndents(
214
+ std::ostream& out,
215
+ size_t indent,
216
+ const std::string& str) const {
217
+ for (auto c : str) {
218
+ out << c;
219
+ if (c == '\n') {
220
+ emitIndent(out, indent);
221
+ }
222
+ }
223
+ }
224
+ void emitLinesIndented(
225
+ std::stringstream& out,
226
+ size_t indent,
227
+ const string_list& strings) const {
228
+ for (const auto i : c10::irange(strings.size())) {
229
+ if (i > 0)
230
+ emitIndent(out, indent);
231
+ emitStringWithIndents(out, indent, strings[i]);
232
+ if (i + 1 != strings.size())
233
+ out << "\n";
234
+ }
235
+ }
236
+ std::string template_text;
237
+ };
238
+
239
+ static inline std::string format(const std::string& fmt, TemplateEnv& env) {
240
+ return CodeTemplate(fmt).format(env);
241
+ }
242
+
243
+ } // namespace at::jit
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_adaptive_avg_pool2d_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API at::Tensor & _adaptive_avg_pool2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size);
21
+ TORCH_API at::Tensor & _adaptive_avg_pool2d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out);
22
+ TORCH_API at::Tensor & _adaptive_avg_pool2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size);
23
+ TORCH_API at::Tensor & _adaptive_avg_pool2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out);
24
+
25
+ } // namespace compositeexplicitautograd
26
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cast_Short_native.h ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor _cast_Short(const at::Tensor & self, bool non_blocking=false);
20
+ } // namespace native
21
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_ops.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API _debug_has_internal_overlap {
18
+ using schema = int64_t (const at::Tensor &);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_debug_has_internal_overlap")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_debug_has_internal_overlap(Tensor self) -> int")
24
+ static int64_t call(const at::Tensor & self);
25
+ static int64_t redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self);
26
+ };
27
+
28
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_tensor_affine_cuda_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API at::Tensor _fake_quantize_learnable_per_tensor_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor=1.0);
21
+
22
+ } // namespace cuda
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_flash_attention_backward_native.h ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _flash_attention_backward(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, c10::optional<double> scale=c10::nullopt);
20
+ } // namespace native
21
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_acos_native.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API void _foreach_acos_out(at::TensorList self, at::TensorList out);
20
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_acos_slow(at::TensorList self);
21
+ TORCH_API void foreach_tensor_acos_slow_(at::TensorList self);
22
+ TORCH_API ::std::vector<at::Tensor> foreach_tensor_acos_cuda(at::TensorList self);
23
+ TORCH_API void foreach_tensor_acos_cuda_(at::TensorList self);
24
+ } // namespace native
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_slogdet_meta.h ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeMetaFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/TensorIterator.h>
13
+ #include <ATen/TensorMeta.h>
14
+ #include <tuple>
15
+ #include <vector>
16
+
17
+ namespace at {
18
+ namespace meta {
19
+
20
+ struct TORCH_API structured__linalg_slogdet : public at::impl::MetaBase {
21
+
22
+
23
+ void meta(const at::Tensor & A);
24
+ };
25
+
26
+ } // namespace native
27
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_linalg_svd_cpu_dispatch.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _linalg_svd(const at::Tensor & A, bool full_matrices=false, bool compute_uv=true, c10::optional<c10::string_view> driver=c10::nullopt);
21
+ TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _linalg_svd_out(at::Tensor & U, at::Tensor & S, at::Tensor & Vh, const at::Tensor & A, bool full_matrices=false, bool compute_uv=true, c10::optional<c10::string_view> driver=c10::nullopt);
22
+ TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _linalg_svd_outf(const at::Tensor & A, bool full_matrices, bool compute_uv, c10::optional<c10::string_view> driver, at::Tensor & U, at::Tensor & S, at::Tensor & Vh);
23
+
24
+ } // namespace cpu
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_logcumsumexp.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/_logcumsumexp_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::_logcumsumexp(Tensor self, int dim) -> Tensor
26
+ inline at::Tensor _logcumsumexp(const at::Tensor & self, int64_t dim) {
27
+ return at::_ops::_logcumsumexp::call(self, dim);
28
+ }
29
+
30
+ // aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
31
+ inline at::Tensor & _logcumsumexp_out(at::Tensor & out, const at::Tensor & self, int64_t dim) {
32
+ return at::_ops::_logcumsumexp_out::call(self, dim, out);
33
+ }
34
+ // aten::_logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
35
+ inline at::Tensor & _logcumsumexp_outf(const at::Tensor & self, int64_t dim, at::Tensor & out) {
36
+ return at::_ops::_logcumsumexp_out::call(self, dim, out);
37
+ }
38
+
39
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/_native_multi_head_attention_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::_native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)
26
+ inline ::std::tuple<at::Tensor,at::Tensor> _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional<int64_t> mask_type=c10::nullopt) {
27
+ return at::_ops::_native_multi_head_attention::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type);
28
+ }
29
+
30
+ // aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))
31
+ inline ::std::tuple<at::Tensor &,at::Tensor &> _native_multi_head_attention_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional<int64_t> mask_type=c10::nullopt) {
32
+ return at::_ops::_native_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type, out0, out1);
33
+ }
34
+ // aten::_native_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))
35
+ inline ::std::tuple<at::Tensor &,at::Tensor &> _native_multi_head_attention_outf(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional<at::Tensor> & mask, bool need_weights, bool average_attn_weights, c10::optional<int64_t> mask_type, at::Tensor & out0, at::Tensor & out1) {
36
+ return at::_ops::_native_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, need_weights, average_attn_weights, mask_type, out0, out1);
37
+ }
38
+
39
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets_native.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor & _nested_tensor_storage_offsets_out(const at::Tensor & self, at::Tensor & out);
20
+ TORCH_API at::Tensor _nested_tensor_storage_offsets(const at::Tensor & self);
21
+ } // namespace native
22
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out);
20
+ TORCH_API at::Tensor log_softmax_backward_sparse_cpu(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self);
21
+ TORCH_API at::Tensor log_softmax_backward_sparse_cuda(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self);
22
+ } // namespace native
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_native.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor & _to_sparse_sparse_dim_out(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out);
20
+ TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, int64_t sparse_dim);
21
+ TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, int64_t sparse_dim);
22
+ TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, int64_t sparse_dim);
23
+ TORCH_API at::Tensor & _to_sparse_out(const at::Tensor & self, c10::optional<at::Layout> layout, at::OptionalIntArrayRef blocksize, c10::optional<int64_t> dense_dim, at::Tensor & out);
24
+ TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, c10::optional<at::Layout> layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional<int64_t> dense_dim=c10::nullopt);
25
+ TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, c10::optional<at::Layout> layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional<int64_t> dense_dim=c10::nullopt);
26
+ TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, c10::optional<at::Layout> layout=c10::nullopt, at::OptionalIntArrayRef blocksize=c10::nullopt, c10::optional<int64_t> dense_dim=c10::nullopt);
27
+ } // namespace native
28
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeMetaFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/TensorIterator.h>
13
+ #include <ATen/TensorMeta.h>
14
+ #include <tuple>
15
+ #include <vector>
16
+
17
+ namespace at {
18
+ namespace meta {
19
+
20
+ struct TORCH_API structured__upsample_nearest_exact2d_backward : public at::impl::MetaBase {
21
+
22
+
23
+ void meta(const at::Tensor & grad_output, at::ArrayRef<int64_t> output_size, at::ArrayRef<int64_t> input_size, c10::optional<double> scales_h, c10::optional<double> scales_w);
24
+ };
25
+
26
+ } // namespace native
27
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/_validate_sparse_csr_tensor_args_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
26
+ inline void _validate_sparse_csr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size) {
27
+ return at::_ops::_validate_sparse_csr_tensor_args::call(crow_indices, col_indices, values, size);
28
+ }
29
+
30
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_as_ops.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API align_as {
18
+ using schema = at::Tensor (const at::Tensor &, const at::Tensor &);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::align_as")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "align_as(Tensor self, Tensor other) -> Tensor")
24
+ static at::Tensor call(const at::Tensor & self, const at::Tensor & other);
25
+ static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other);
26
+ };
27
+
28
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/aminmax_meta.h ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeMetaFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/TensorIterator.h>
13
+ #include <ATen/TensorMeta.h>
14
+ #include <tuple>
15
+ #include <vector>
16
+
17
+ namespace at {
18
+ namespace meta {
19
+
20
+ struct TORCH_API structured_aminmax : public at::impl::MetaBase {
21
+
22
+
23
+ void meta(const at::Tensor & self, c10::optional<int64_t> dim, bool keepdim);
24
+ };
25
+
26
+ } // namespace native
27
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ceil_ops.h ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API ceil {
18
+ using schema = at::Tensor (const at::Tensor &);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil(Tensor self) -> Tensor")
24
+ static at::Tensor call(const at::Tensor & self);
25
+ static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self);
26
+ };
27
+
28
+ struct TORCH_API ceil_ {
29
+ using schema = at::Tensor & (at::Tensor &);
30
+ using ptr_schema = schema*;
31
+ // See Note [static constexpr char* members for windows NVCC]
32
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil_")
33
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
34
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil_(Tensor(a!) self) -> Tensor(a!)")
35
+ static at::Tensor & call(at::Tensor & self);
36
+ static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self);
37
+ };
38
+
39
+ struct TORCH_API ceil_out {
40
+ using schema = at::Tensor & (const at::Tensor &, at::Tensor &);
41
+ using ptr_schema = schema*;
42
+ // See Note [static constexpr char* members for windows NVCC]
43
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ceil")
44
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
45
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)")
46
+ static at::Tensor & call(const at::Tensor & self, at::Tensor & out);
47
+ static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out);
48
+ };
49
+
50
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/constant_pad_nd_compositeexplicitautograd_dispatch.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautograd {
19
+
20
+ TORCH_API at::Tensor constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value=0);
21
+ TORCH_API at::Tensor constant_pad_nd_symint(const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value=0);
22
+ TORCH_API at::Tensor & constant_pad_nd_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value=0);
23
+ TORCH_API at::Tensor & constant_pad_nd_outf(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value, at::Tensor & out);
24
+ TORCH_API at::Tensor & constant_pad_nd_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value=0);
25
+ TORCH_API at::Tensor & constant_pad_nd_symint_outf(const at::Tensor & self, c10::SymIntArrayRef pad, const at::Scalar & value, at::Tensor & out);
26
+
27
+ } // namespace compositeexplicitautograd
28
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dot_cuda_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API at::Tensor dot(const at::Tensor & self, const at::Tensor & tensor);
21
+
22
+ } // namespace cuda
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfft.h ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/fft_hfft_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
26
+ inline at::Tensor fft_hfft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
27
+ return at::_ops::fft_hfft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
28
+ }
29
+ namespace symint {
30
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
31
+ at::Tensor fft_hfft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
32
+ return at::_ops::fft_hfft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
33
+ }
34
+ }
35
+
36
+ // aten::fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
37
+ inline at::Tensor fft_hfft_symint(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
38
+ return at::_ops::fft_hfft::call(self, n, dim, norm);
39
+ }
40
+ namespace symint {
41
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
42
+ at::Tensor fft_hfft(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
43
+ return at::_ops::fft_hfft::call(self, n, dim, norm);
44
+ }
45
+ }
46
+
47
+ // aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
48
+ inline at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
49
+ return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
50
+ }
51
+ namespace symint {
52
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
53
+ at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
54
+ return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
55
+ }
56
+ }
57
+
58
+ // aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
59
+ inline at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
60
+ return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
61
+ }
62
+ namespace symint {
63
+ template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
64
+ at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
65
+ return at::_ops::fft_hfft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
66
+ }
67
+ }
68
+
69
+ // aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
70
+ inline at::Tensor & fft_hfft_symint_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
71
+ return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
72
+ }
73
+ namespace symint {
74
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
75
+ at::Tensor & fft_hfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
76
+ return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
77
+ }
78
+ }
79
+
80
+ // aten::fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
81
+ inline at::Tensor & fft_hfft_symint_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
82
+ return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
83
+ }
84
+ namespace symint {
85
+ template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
86
+ at::Tensor & fft_hfft_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
87
+ return at::_ops::fft_hfft_out::call(self, n, dim, norm, out);
88
+ }
89
+ }
90
+
91
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2_native.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor fft_irfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt);
20
+ TORCH_API at::Tensor & fft_irfft2_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out);
21
+ } // namespace native
22
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftn_native.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API at::Tensor fft_rfftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional<c10::string_view> norm=c10::nullopt);
20
+ TORCH_API at::Tensor & fft_rfftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out);
21
+ } // namespace native
22
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fix.h ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/fix_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::fix(Tensor self) -> Tensor
26
+ inline at::Tensor fix(const at::Tensor & self) {
27
+ return at::_ops::fix::call(self);
28
+ }
29
+
30
+ // aten::fix_(Tensor(a!) self) -> Tensor(a!)
31
+ inline at::Tensor & fix_(at::Tensor & self) {
32
+ return at::_ops::fix_::call(self);
33
+ }
34
+
35
+ // aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
36
+ inline at::Tensor & fix_out(at::Tensor & out, const at::Tensor & self) {
37
+ return at::_ops::fix_out::call(self, out);
38
+ }
39
+ // aten::fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
40
+ inline at::Tensor & fix_outf(const at::Tensor & self, at::Tensor & out) {
41
+ return at::_ops::fix_out::call(self, out);
42
+ }
43
+
44
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeimplicitautograd {
19
+
20
+ TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Tensor & exponent);
21
+ TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & exponent);
22
+ TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Tensor & exponent, at::Tensor & out);
23
+ TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Tensor & exponent);
24
+ TORCH_API at::Tensor float_power(const at::Scalar & self, const at::Tensor & exponent);
25
+ TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & exponent);
26
+ TORCH_API at::Tensor & float_power_outf(const at::Scalar & self, const at::Tensor & exponent, at::Tensor & out);
27
+ TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Scalar & exponent);
28
+ TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & exponent);
29
+ TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Scalar & exponent, at::Tensor & out);
30
+ TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Scalar & exponent);
31
+
32
+ } // namespace compositeimplicitautograd
33
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/geometric_cpu_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API at::Tensor & geometric_(at::Tensor & self, double p, c10::optional<at::Generator> generator=c10::nullopt);
21
+
22
+ } // namespace cpu
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_backward_ops.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API glu_backward_grad_input {
18
+ using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::glu_backward")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)")
24
+ static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input);
25
+ static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim, at::Tensor & grad_input);
26
+ };
27
+
28
+ struct TORCH_API glu_backward {
29
+ using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t);
30
+ using ptr_schema = schema*;
31
+ // See Note [static constexpr char* members for windows NVCC]
32
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::glu_backward")
33
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
34
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor")
35
+ static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim);
36
+ static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, int64_t dim);
37
+ };
38
+
39
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardshrink_native.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+ #include <ATen/ops/hardshrink_meta.h>
16
+
17
+ namespace at {
18
+ namespace native {
19
+ struct TORCH_API structured_hardshrink_out : public at::meta::structured_hardshrink {
20
+ void impl(const at::Tensor & self, const at::Scalar & lambd, const at::Tensor & out);
21
+ };
22
+ } // namespace native
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hardtanh_backward_cpu_dispatch.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API at::Tensor hardtanh_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val);
21
+ TORCH_API at::Tensor & hardtanh_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val);
22
+ TORCH_API at::Tensor & hardtanh_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & grad_input);
23
+
24
+ } // namespace cpu
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isposinf_cuda_dispatch.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API at::Tensor isposinf(const at::Tensor & self);
21
+ TORCH_API at::Tensor & isposinf_out(at::Tensor & out, const at::Tensor & self);
22
+ TORCH_API at::Tensor & isposinf_outf(const at::Tensor & self, at::Tensor & out);
23
+
24
+ } // namespace cuda
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/le_compositeexplicitautogradnonfunctional_dispatch.h ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautogradnonfunctional {
19
+
20
+ TORCH_API at::Tensor le(const at::Tensor & self, const at::Scalar & other);
21
+ TORCH_API at::Tensor & le_(at::Tensor & self, const at::Scalar & other);
22
+ TORCH_API at::Tensor le(const at::Tensor & self, const at::Tensor & other);
23
+ TORCH_API at::Tensor & le_(at::Tensor & self, const at::Tensor & other);
24
+
25
+ } // namespace compositeexplicitautogradnonfunctional
26
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace compositeexplicitautogradnonfunctional {
19
+
20
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor> linalg_qr(const at::Tensor & A, c10::string_view mode="reduced");
21
+
22
+ } // namespace compositeexplicitautogradnonfunctional
23
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svd_native.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/core/Tensor.h>
13
+ #include <tuple>
14
+ #include <vector>
15
+
16
+
17
+ namespace at {
18
+ namespace native {
19
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> linalg_svd(const at::Tensor & A, bool full_matrices=true, c10::optional<c10::string_view> driver=c10::nullopt);
20
+ TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> linalg_svd_out(const at::Tensor & A, bool full_matrices, c10::optional<c10::string_view> driver, at::Tensor & U, at::Tensor & S, at::Tensor & Vh);
21
+ } // namespace native
22
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_meta.h ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from NativeMetaFunction.h
4
+
5
+ #include <c10/core/Scalar.h>
6
+ #include <c10/core/Storage.h>
7
+ #include <c10/core/TensorOptions.h>
8
+ #include <c10/util/Deprecated.h>
9
+ #include <c10/util/Optional.h>
10
+ #include <c10/core/QScheme.h>
11
+ #include <ATen/core/Reduction.h>
12
+ #include <ATen/TensorIterator.h>
13
+ #include <ATen/TensorMeta.h>
14
+ #include <tuple>
15
+ #include <vector>
16
+
17
+ namespace at {
18
+ namespace meta {
19
+
20
+ struct TORCH_API structured_linalg_vector_norm : public at::impl::MetaBase {
21
+
22
+
23
+ void meta(const at::Tensor & self, const at::Scalar & ord, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype);
24
+ };
25
+
26
+ } // namespace native
27
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logaddexp2_cuda_dispatch.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cuda {
19
+
20
+ TORCH_API at::Tensor logaddexp2(const at::Tensor & self, const at::Tensor & other);
21
+ TORCH_API at::Tensor & logaddexp2_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
22
+ TORCH_API at::Tensor & logaddexp2_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
23
+
24
+ } // namespace cuda
25
+ } // namespace at
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool1d_with_indices.h ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Function.h
4
+
5
+ #include <ATen/Context.h>
6
+ #include <ATen/DeviceGuard.h>
7
+ #include <ATen/TensorUtils.h>
8
+ #include <ATen/TracerMode.h>
9
+ #include <ATen/core/Generator.h>
10
+ #include <ATen/core/Reduction.h>
11
+ #include <ATen/core/Tensor.h>
12
+ #include <c10/core/Scalar.h>
13
+ #include <c10/core/Storage.h>
14
+ #include <c10/core/TensorOptions.h>
15
+ #include <c10/util/Deprecated.h>
16
+ #include <c10/util/Optional.h>
17
+
18
+
19
+
20
+ #include <ATen/ops/max_pool1d_with_indices_ops.h>
21
+
22
+ namespace at {
23
+
24
+
25
+ // aten::max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
26
+ inline ::std::tuple<at::Tensor,at::Tensor> max_pool1d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) {
27
+ return at::_ops::max_pool1d_with_indices::call(self, kernel_size, stride, padding, dilation, ceil_mode);
28
+ }
29
+
30
+ }
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_ops.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API max_pool2d_with_indices_out {
18
+ using schema = ::std::tuple<at::Tensor &,at::Tensor &> (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool, at::Tensor &, at::Tensor &);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool2d_with_indices")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))")
24
+ static ::std::tuple<at::Tensor &,at::Tensor &> call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices);
25
+ static ::std::tuple<at::Tensor &,at::Tensor &> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices);
26
+ };
27
+
28
+ struct TORCH_API max_pool2d_with_indices {
29
+ using schema = ::std::tuple<at::Tensor,at::Tensor> (const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool);
30
+ using ptr_schema = schema*;
31
+ // See Note [static constexpr char* members for windows NVCC]
32
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool2d_with_indices")
33
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
34
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)")
35
+ static ::std::tuple<at::Tensor,at::Tensor> call(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode);
36
+ static ::std::tuple<at::Tensor,at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode);
37
+ };
38
+
39
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/miopen_convolution_relu_ops.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ // @generated by torchgen/gen.py from Operator.h
4
+
5
+ #include <tuple>
6
+ #include <vector>
7
+
8
+ // Forward declarations of any types needed in the operator signatures.
9
+ // We can't directly include these classes because it will cause circular include dependencies.
10
+ // This file is included by TensorBody.h, which defines the Tensor class.
11
+ #include <ATen/core/ATen_fwd.h>
12
+
13
+ namespace at {
14
+ namespace _ops {
15
+
16
+
17
+ struct TORCH_API miopen_convolution_relu {
18
+ using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const c10::optional<at::Tensor> &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt);
19
+ using ptr_schema = schema*;
20
+ // See Note [static constexpr char* members for windows NVCC]
21
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::miopen_convolution_relu")
22
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
23
+ STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor")
24
+ static at::Tensor call(const at::Tensor & self, const at::Tensor & weight, const c10::optional<at::Tensor> & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups);
25
+ static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, const c10::optional<at::Tensor> & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups);
26
+ };
27
+
28
+ }} // namespace at::_ops
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_rnn_layer_backward_cpu_dispatch.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // @generated by torchgen/gen.py from DispatchKeyFunction.h
3
+
4
+ // NB: The implementing C++ file is RegisterDispatchKey.cpp
5
+
6
+ // The only #includes we need are for custom classes that have defaults in the C++ API
7
+ #include <c10/core/MemoryFormat.h>
8
+ #include <c10/core/Scalar.h>
9
+ #include <ATen/core/Reduction.h>
10
+
11
+ // Forward declarations of any types needed in the operator signatures.
12
+ // We can't directly include these classes because it will cause circular include dependencies.
13
+ // This file is included by TensorBody.h, which defines the Tensor class.
14
+ #include <ATen/core/ATen_fwd.h>
15
+
16
+ namespace at {
17
+
18
+ namespace cpu {
19
+
20
+ TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor> mkldnn_rnn_layer_backward(const at::Tensor & input, const at::Tensor & weight1, const at::Tensor & weight2, const at::Tensor & weight3, const at::Tensor & weight4, const at::Tensor & hx_, const at::Tensor & cx_tmp, const at::Tensor & output, const at::Tensor & hy_, const at::Tensor & cy_, const c10::optional<at::Tensor> & grad_output, const c10::optional<at::Tensor> & grad_hy, const c10::optional<at::Tensor> & grad_cy, bool reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes, bool batch_first, const at::Tensor & workspace);
21
+
22
+ } // namespace cpu
23
+ } // namespace at