koichi12 commited on Feb 12, 2025

Commit

0b443f9

verified ·

1 Parent(s): 25bcc66

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_cuda_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_empty_affine_quantized_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cpu_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cuda_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_frac_ops.h +50 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_mul_native.h +40 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_trunc_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_zero_compositeexplicitautograd_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_mkldnn_reshape.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_get_values_native.h +20 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_print_compositeexplicitautograd_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_unsafe_index_put.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h +27 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_version.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/add_meta.h +27 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arcsin_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/asin_cpu_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/atan_native.h +29 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_elemt.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_stats_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/binary_cross_entropy_backward_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bmm_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/channel_shuffle.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/col2im_native.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/crow_indices.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dist_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/embedding_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/erfc_meta_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_fft.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gather_cuda_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge.h +53 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge_cpu_dispatch.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hsplit_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/indices.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/item_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kl_div.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kthvalue_cpu_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul.h +39 -0

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, at::TensorOptions options);
+TORCH_API at::Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_empty_affine_quantized_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor & _empty_affine_quantized_out_symint(c10::SymIntArrayRef size, double scale, int64_t zero_point, c10::optional<at::MemoryFormat> memory_format, at::Tensor & out);
+TORCH_API at::Tensor empty_affine_quantized_other_backends_stub(at::IntArrayRef size, c10::optional<at::ScalarType> dtype={}, c10::optional<at::Layout> layout={}, c10::optional<at::Device> device={}, c10::optional<bool> pin_memory={}, double scale=1, int64_t zero_point=0, c10::optional<at::MemoryFormat> memory_format=MemoryFormat::Contiguous);
+TORCH_API at::Tensor empty_affine_quantized(at::IntArrayRef size, c10::optional<at::ScalarType> dtype={}, c10::optional<at::Layout> layout={}, c10::optional<at::Device> device={}, c10::optional<bool> pin_memory={}, double scale=1, int64_t zero_point=0, c10::optional<at::MemoryFormat> memory_format=MemoryFormat::Contiguous);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API ::std::vector<at::Tensor> _foreach_expm1(at::TensorList self);
+TORCH_API void _foreach_expm1_(at::TensorList self);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_expm1_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API ::std::vector<at::Tensor> _foreach_expm1(at::TensorList self);
+TORCH_API void _foreach_expm1_(at::TensorList self);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_frac_ops.h ADDED Viewed

	@@ -0,0 +1,50 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API _foreach_frac {
+  using schema = ::std::vector<at::Tensor> (at::TensorList);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_foreach_frac")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_foreach_frac(Tensor[] self) -> Tensor[]")
+  static ::std::vector<at::Tensor> call(at::TensorList self);
+  static ::std::vector<at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self);
+};
+struct TORCH_API _foreach_frac_ {
+  using schema = void (at::TensorList);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_foreach_frac_")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_foreach_frac_(Tensor(a!)[] self) -> ()")
+  static void call(at::TensorList self);
+  static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self);
+};
+struct TORCH_API _foreach_frac_out {
+  using schema = void (at::TensorList, at::TensorList);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_foreach_frac")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_foreach_frac.out(Tensor[] self, *, Tensor(a!)[] out) -> ()")
+  static void call(at::TensorList self, at::TensorList out);
+  static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_mul_native.h ADDED Viewed

	@@ -0,0 +1,40 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API void _foreach_mul_Scalar_out(at::TensorList self, const at::Scalar & scalar, at::TensorList out);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalar_kernel_slow(at::TensorList self, const at::Scalar & scalar);
+TORCH_API void foreach_tensor_mul_scalar_kernel_slow_(at::TensorList self, const at::Scalar & scalar);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalar_kernel_cuda(at::TensorList self, const at::Scalar & scalar);
+TORCH_API void foreach_tensor_mul_scalar_kernel_cuda_(at::TensorList self, const at::Scalar & scalar);
+TORCH_API void _foreach_mul_List_out(at::TensorList self, at::TensorList other, at::TensorList out);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_list_kernel_slow(at::TensorList self, at::TensorList other);
+TORCH_API void foreach_tensor_mul_list_kernel_slow_(at::TensorList self, at::TensorList other);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_list_kernel_cuda(at::TensorList self, at::TensorList other);
+TORCH_API void foreach_tensor_mul_list_kernel_cuda_(at::TensorList self, at::TensorList other);
+TORCH_API void _foreach_mul_ScalarList_out(at::TensorList self, at::ArrayRef<at::Scalar> scalars, at::TensorList out);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalarlist_kernel_slow(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
+TORCH_API void foreach_tensor_mul_scalarlist_kernel_slow_(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_scalarlist_kernel_cuda(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
+TORCH_API void foreach_tensor_mul_scalarlist_kernel_cuda_(at::TensorList self, at::ArrayRef<at::Scalar> scalars);
+TORCH_API void _foreach_mul_Tensor_out(at::TensorList self, const at::Tensor & other, at::TensorList out);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_tensor_kernel_slow(at::TensorList self, const at::Tensor & other);
+TORCH_API void foreach_tensor_mul_tensor_kernel_slow_(at::TensorList self, const at::Tensor & other);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_mul_tensor_kernel_cuda(at::TensorList self, const at::Tensor & other);
+TORCH_API void foreach_tensor_mul_tensor_kernel_cuda_(at::TensorList self, const at::Tensor & other);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_trunc_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API void _foreach_trunc_out(at::TensorList out, at::TensorList self);
+TORCH_API void _foreach_trunc_outf(at::TensorList self, at::TensorList out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_zero_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API ::std::vector<at::Tensor> _foreach_zero(at::TensorList self);
+TORCH_API void _foreach_zero_out(at::TensorList out, at::TensorList self);
+TORCH_API void _foreach_zero_outf(at::TensorList self, at::TensorList out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_mkldnn_reshape.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_mkldnn_reshape_ops.h>
+namespace at {
+// aten::_mkldnn_reshape(Tensor self, int[] shape) -> Tensor
+inline at::Tensor _mkldnn_reshape(const at::Tensor & self, at::IntArrayRef shape) {
+    return at::_ops::_mkldnn_reshape::call(self, shape);
+}
+// aten::_mkldnn_reshape.out(Tensor self, int[] shape, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _mkldnn_reshape_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef shape) {
+    return at::_ops::_mkldnn_reshape_out::call(self, shape, out);
+}
+// aten::_mkldnn_reshape.out(Tensor self, int[] shape, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _mkldnn_reshape_outf(const at::Tensor & self, at::IntArrayRef shape, at::Tensor & out) {
+    return at::_ops::_mkldnn_reshape_out::call(self, shape, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_get_values_native.h ADDED Viewed

	@@ -0,0 +1,20 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_print_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API void _print(c10::string_view s);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_from_tensor_ops.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API _reshape_from_tensor {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_reshape_from_tensor")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_reshape_from_tensor(Tensor self, Tensor shape) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, const at::Tensor & shape);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & shape);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a=1, int64_t b=1);
+TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a, c10::string_view b);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & _test_functorch_fallback_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & _test_functorch_fallback_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & _to_sparse_bsr_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, c10::optional<int64_t> dense_dim=c10::nullopt);
+TORCH_API at::Tensor & _to_sparse_bsr_outf(const at::Tensor & self, at::IntArrayRef blocksize, c10::optional<int64_t> dense_dim, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_unsafe_index_put.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_unsafe_index_put_ops.h>
+namespace at {
+// aten::_unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
+inline at::Tensor _unsafe_index_put(const at::Tensor & self, const c10::List<c10::optional<at::Tensor>> & indices, const at::Tensor & values, bool accumulate=false) {
+    return at::_ops::_unsafe_index_put::call(self, indices, values, accumulate);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h ADDED Viewed

	@@ -0,0 +1,27 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/_upsample_nearest_exact1d_meta.h>
+namespace at {
+namespace native {
+TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
+struct TORCH_API structured__upsample_nearest_exact1d_out_cpu : public at::meta::structured__upsample_nearest_exact1d {
+void impl(const at::Tensor & self, at::ArrayRef<int64_t> output_size, c10::optional<double> scales, const at::Tensor & out);
+};
+struct TORCH_API structured__upsample_nearest_exact1d_out_cuda : public at::meta::structured__upsample_nearest_exact1d {
+void impl(const at::Tensor & self, at::ArrayRef<int64_t> output_size, c10::optional<double> scales, const at::Tensor & out);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
+TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_version.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_version_ops.h>
+namespace at {
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API ::std::tuple<at::Tensor,at::Tensor> _weight_norm_differentiable_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/add_meta.h ADDED Viewed

	@@ -0,0 +1,27 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeMetaFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/TensorIterator.h>
+#include <ATen/TensorMeta.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace meta {
+struct TORCH_API structured_add_Tensor : public TensorIteratorBase {
+    void meta(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arcsin_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor arcsin(const at::Tensor & self);
+TORCH_API at::Tensor & arcsin_out(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & arcsin_(at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/asin_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor asin(const at::Tensor & self);
+TORCH_API at::Tensor & asin_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & asin_outf(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & asin_(at::Tensor & self);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/atan_native.h ADDED Viewed

	@@ -0,0 +1,29 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/atan_meta.h>
+namespace at {
+namespace native {
+struct TORCH_API structured_atan_out : public at::meta::structured_atan {
+void impl(const at::Tensor & self, const at::Tensor & out);
+};
+TORCH_API at::Tensor atan_sparse(const at::Tensor & self);
+TORCH_API at::Tensor & atan_sparse_out(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & atan_sparse_(at::Tensor & self);
+TORCH_API at::Tensor atan_sparse_csr(const at::Tensor & self);
+TORCH_API at::Tensor & atan_sparse_csr_out(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & atan_sparse_csr_(at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/batch_norm_backward_elemt_ops.h>
+namespace at {
+// aten::batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor
+inline at::Tensor batch_norm_backward_elemt(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional<at::Tensor> & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count) {
+    return at::_ops::batch_norm_backward_elemt::call(grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count);
+}
+// aten::batch_norm_backward_elemt.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & batch_norm_backward_elemt_out(at::Tensor & out, const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional<at::Tensor> & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count) {
+    return at::_ops::batch_norm_backward_elemt_out::call(grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count, out);
+}
+// aten::batch_norm_backward_elemt.out(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & batch_norm_backward_elemt_outf(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional<at::Tensor> & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count, at::Tensor & out) {
+    return at::_ops::batch_norm_backward_elemt_out::call(grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_elemt.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/batch_norm_elemt_ops.h>
+namespace at {
+// aten::batch_norm_elemt(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps) -> Tensor
+inline at::Tensor batch_norm_elemt(const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const at::Tensor & mean, const at::Tensor & invstd, double eps) {
+    return at::_ops::batch_norm_elemt::call(input, weight, bias, mean, invstd, eps);
+}
+// aten::batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & batch_norm_elemt_out(at::Tensor & out, const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const at::Tensor & mean, const at::Tensor & invstd, double eps) {
+    return at::_ops::batch_norm_elemt_out::call(input, weight, bias, mean, invstd, eps, out);
+}
+// aten::batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & batch_norm_elemt_outf(const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const at::Tensor & mean, const at::Tensor & invstd, double eps, at::Tensor & out) {
+    return at::_ops::batch_norm_elemt_out::call(input, weight, bias, mean, invstd, eps, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_stats_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> batch_norm_stats_out(const at::Tensor & input, double eps, at::Tensor & out0, at::Tensor & out1);
+TORCH_API ::std::tuple<at::Tensor,at::Tensor> batch_norm_stats_cuda(const at::Tensor & input, double eps);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/binary_cross_entropy_backward_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API binary_cross_entropy_backward {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const c10::optional<at::Tensor> &, int64_t);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::binary_cross_entropy_backward")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor")
+  static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction);
+};
+struct TORCH_API binary_cross_entropy_backward_grad_input {
+  using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, const c10::optional<at::Tensor> &, int64_t, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::binary_cross_entropy_backward")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "binary_cross_entropy_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction, at::Tensor & grad_input);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const c10::optional<at::Tensor> & weight, int64_t reduction, at::Tensor & grad_input);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bmm_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2);
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/channel_shuffle.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/channel_shuffle_ops.h>
+namespace at {
+// aten::channel_shuffle(Tensor self, SymInt groups) -> Tensor
+inline at::Tensor channel_shuffle(const at::Tensor & self, int64_t groups) {
+    return at::_ops::channel_shuffle::call(self, groups);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor channel_shuffle(const at::Tensor & self, int64_t groups) {
+    return at::_ops::channel_shuffle::call(self, groups);
+  }
+}
+// aten::channel_shuffle(Tensor self, SymInt groups) -> Tensor
+inline at::Tensor channel_shuffle_symint(const at::Tensor & self, c10::SymInt groups) {
+    return at::_ops::channel_shuffle::call(self, groups);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor channel_shuffle(const at::Tensor & self, c10::SymInt groups) {
+    return at::_ops::channel_shuffle::call(self, groups);
+  }
+}
+// aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & channel_shuffle_out(at::Tensor & out, const at::Tensor & self, int64_t groups) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & channel_shuffle_out(at::Tensor & out, const at::Tensor & self, int64_t groups) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+  }
+}
+// aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & channel_shuffle_outf(const at::Tensor & self, int64_t groups, at::Tensor & out) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & channel_shuffle_outf(const at::Tensor & self, int64_t groups, at::Tensor & out) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+  }
+}
+// aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & channel_shuffle_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymInt groups) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & channel_shuffle_out(at::Tensor & out, const at::Tensor & self, c10::SymInt groups) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+  }
+}
+// aten::channel_shuffle.out(Tensor self, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & channel_shuffle_symint_outf(const at::Tensor & self, c10::SymInt groups, at::Tensor & out) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & channel_shuffle_outf(const at::Tensor & self, c10::SymInt groups, at::Tensor & out) {
+    return at::_ops::channel_shuffle_out::call(self, groups, out);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/col2im_native.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor col2im_cpu(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride);
+TORCH_API at::Tensor & col2im_out_cpu(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out);
+TORCH_API at::Tensor col2im_cuda(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride);
+TORCH_API at::Tensor & col2im_out_cuda(const at::Tensor & self, at::IntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/crow_indices.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/crow_indices_ops.h>
+namespace at {
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/cudnn_convolution_transpose_ops.h>
+namespace at {
+// aten::cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+inline at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32);
+  }
+}
+// aten::cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+inline at::Tensor cudnn_convolution_transpose_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
+  }
+}
+// aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & cudnn_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & cudnn_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
+  }
+}
+// aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & cudnn_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & cudnn_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, benchmark, deterministic, allow_tf32, out);
+  }
+}
+// aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & cudnn_convolution_transpose_symint_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & cudnn_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
+  }
+}
+// aten::cudnn_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & cudnn_convolution_transpose_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & cudnn_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out) {
+    return at::_ops::cudnn_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, out);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dist_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor dist(const at::Tensor & self, const at::Tensor & other, const at::Scalar & p=2);
+TORCH_API at::Tensor & dist_out(const at::Tensor & self, const at::Tensor & other, const at::Scalar & p, at::Tensor & out);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/embedding_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor embedding_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx=-1, bool scale_grad_by_freq=false, bool sparse=false);
+TORCH_API at::Tensor & embedding_out_symint(const at::Tensor & weight, const at::Tensor & indices, c10::SymInt padding_idx, bool scale_grad_by_freq, bool sparse, at::Tensor & out);
+TORCH_API at::Tensor NestedTensor_embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx=-1, bool scale_grad_by_freq=false, bool sparse=false);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/erfc_meta_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace meta {
+TORCH_API at::Tensor erfc(const at::Tensor & self);
+TORCH_API at::Tensor & erfc_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & erfc_outf(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & erfc_(at::Tensor & self);
+} // namespace meta
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_fft.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/fft_fft_ops.h>
+namespace at {
+// aten::fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
+inline at::Tensor fft_fft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor fft_fft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm);
+  }
+}
+// aten::fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
+inline at::Tensor fft_fft_symint(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft::call(self, n, dim, norm);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor fft_fft(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft::call(self, n, dim, norm);
+  }
+}
+// aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_fft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & fft_fft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
+  }
+}
+// aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_fft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & fft_fft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_fft_out::call(self, n.has_value() ? c10::make_optional(c10::SymInt(*n)) : c10::nullopt, dim, norm, out);
+  }
+}
+// aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_fft_symint_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & fft_fft_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
+  }
+}
+// aten::fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_fft_symint_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & fft_fft_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_fft_out::call(self, n, dim, norm, out);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/fractional_max_pool2d_backward_ops.h>
+namespace at {
+// aten::fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
+inline at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) {
+    return at::_ops::fractional_max_pool2d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input);
+}
+// aten::fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
+inline at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input) {
+    return at::_ops::fractional_max_pool2d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input);
+}
+// aten::fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
+inline at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) {
+    return at::_ops::fractional_max_pool2d_backward::call(grad_output, self, kernel_size, output_size, indices);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gather_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor gather(const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad=false);
+TORCH_API at::Tensor & gather_out(at::Tensor & out, const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad=false);
+TORCH_API at::Tensor & gather_outf(const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad, at::Tensor & out);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge.h ADDED Viewed

	@@ -0,0 +1,53 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/ge_ops.h>
+namespace at {
+// aten::ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other) {
+    return at::_ops::ge_Scalar_out::call(self, other, out);
+}
+// aten::ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & ge_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
+    return at::_ops::ge_Scalar_out::call(self, other, out);
+}
+// aten::ge.Scalar(Tensor self, Scalar other) -> Tensor
+inline at::Tensor ge(const at::Tensor & self, const at::Scalar & other) {
+    return at::_ops::ge_Scalar::call(self, other);
+}
+// aten::ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
+    return at::_ops::ge_Tensor_out::call(self, other, out);
+}
+// aten::ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & ge_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
+    return at::_ops::ge_Tensor_out::call(self, other, out);
+}
+// aten::ge.Tensor(Tensor self, Tensor other) -> Tensor
+inline at::Tensor ge(const at::Tensor & self, const at::Tensor & other) {
+    return at::_ops::ge_Tensor::call(self, other);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ge_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor ge(const at::Tensor & self, const at::Scalar & other);
+TORCH_API at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other);
+TORCH_API at::Tensor & ge_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out);
+TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Scalar & other);
+TORCH_API at::Tensor ge(const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & ge_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & ge_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
+TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Tensor & other);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor gelu_backward(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate="none");
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hsplit_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API hsplit_int {
+  using schema = ::std::vector<at::Tensor> (const at::Tensor &, int64_t);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::hsplit")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "int")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]")
+  static ::std::vector<at::Tensor> call(const at::Tensor & self, int64_t sections);
+  static ::std::vector<at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t sections);
+};
+struct TORCH_API hsplit_array {
+  using schema = ::std::vector<at::Tensor> (const at::Tensor &, at::IntArrayRef);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::hsplit")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "array")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]")
+  static ::std::vector<at::Tensor> call(const at::Tensor & self, at::IntArrayRef indices);
+  static ::std::vector<at::Tensor> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef indices);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/indices.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/indices_ops.h>
+namespace at {
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/item_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Scalar item(const at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kl_div.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/kl_div_ops.h>
+namespace at {
+// aten::kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
+inline at::Tensor kl_div(const at::Tensor & self, const at::Tensor & target, int64_t reduction=at::Reduction::Mean, bool log_target=false) {
+    return at::_ops::kl_div::call(self, target, reduction, log_target);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/kthvalue_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> kthvalue_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t k, int64_t dim=-1, bool keepdim=false);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> kthvalue_outf(const at::Tensor & self, int64_t k, int64_t dim, bool keepdim, at::Tensor & values, at::Tensor & indices);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lgamma_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor lgamma(const at::Tensor & self);
+TORCH_API at::Tensor & lgamma_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & lgamma_outf(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & lgamma_(at::Tensor & self);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/linalg_matmul_ops.h>
+namespace at {
+// aten::linalg_matmul(Tensor self, Tensor other) -> Tensor
+inline at::Tensor linalg_matmul(const at::Tensor & self, const at::Tensor & other) {
+    return at::_ops::linalg_matmul::call(self, other);
+}
+// aten::linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & linalg_matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
+    return at::_ops::linalg_matmul_out::call(self, other, out);
+}
+// aten::linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & linalg_matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
+    return at::_ops::linalg_matmul_out::call(self, other, out);
+}
+}