koichi12 commited on Feb 12, 2025

Commit

b5efcc5

verified ·

1 Parent(s): d99e6e4

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_assert_scalar.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foobar_cpu_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_erfc.h +44 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_lerp.h +63 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_sinh_native.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sgd.h +63 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fw_primal_copy_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_index_put_impl_cuda_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_log_softmax_native.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_make_per_channel_quantized_tensor.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_neg_view_copy_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_softmax_with_shape_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_strides_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_from_tensor.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_resize_output_meta_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_slow_conv2d_backward_compositeexplicitautograd_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_ops.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeimplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_stack.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_optional_intlist_cpu_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_parallel_materialize_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_bsc.h +34 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/absolute_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/alias_copy_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arcsin.h +44 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/argmax_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/avg_pool3d_backward_cpu_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ccol_indices_copy_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/diag_embed_compositeexplicitautogradnonfunctional_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/digamma_compositeexplicitautogradnonfunctional_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/eq_cpu_dispatch.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/exp.h +44 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfft_compositeimplicitautograd_dispatch.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftfreq_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gcd_cpu_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/grid_sampler_2d_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gru_cell_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/index_add_ops.h +61 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/index_reduce_meta_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_distributed_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_inference_compositeimplicitautograd_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isclose_ops.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_lu_solve_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_norm.h +53 -0

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_assert_scalar.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_assert_scalar_ops.h>
+namespace at {
+// aten::_assert_scalar(Scalar self, str assert_msg) -> ()
+inline void _assert_scalar(const at::Scalar & self, c10::string_view assert_msg) {
+    return at::_ops::_assert_scalar::call(self, assert_msg);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foobar_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor _foobar(const at::Tensor & self, bool arg1=true, bool arg2=true, bool arg3=true);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_erfc.h ADDED Viewed

	@@ -0,0 +1,44 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_foreach_erfc_ops.h>
+namespace at {
+// aten::_foreach_erfc(Tensor[] self) -> Tensor[]
+inline ::std::vector<at::Tensor> _foreach_erfc(at::TensorList self) {
+    return at::_ops::_foreach_erfc::call(self);
+}
+// aten::_foreach_erfc_(Tensor(a!)[] self) -> ()
+inline void _foreach_erfc_(at::TensorList self) {
+    return at::_ops::_foreach_erfc_::call(self);
+}
+// aten::_foreach_erfc.out(Tensor[] self, *, Tensor(a!)[] out) -> ()
+inline void _foreach_erfc_out(at::TensorList out, at::TensorList self) {
+    return at::_ops::_foreach_erfc_out::call(self, out);
+}
+// aten::_foreach_erfc.out(Tensor[] self, *, Tensor(a!)[] out) -> ()
+inline void _foreach_erfc_outf(at::TensorList self, at::TensorList out) {
+    return at::_ops::_foreach_erfc_out::call(self, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_lerp.h ADDED Viewed

	@@ -0,0 +1,63 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_foreach_lerp_ops.h>
+namespace at {
+// aten::_foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]
+inline ::std::vector<at::Tensor> _foreach_lerp(at::TensorList self, at::TensorList tensors1, at::TensorList weights) {
+    return at::_ops::_foreach_lerp_List::call(self, tensors1, weights);
+}
+// aten::_foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()
+inline void _foreach_lerp_(at::TensorList self, at::TensorList tensors1, at::TensorList weights) {
+    return at::_ops::_foreach_lerp__List::call(self, tensors1, weights);
+}
+// aten::_foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]
+inline ::std::vector<at::Tensor> _foreach_lerp(at::TensorList self, at::TensorList tensors1, const at::Scalar & weight) {
+    return at::_ops::_foreach_lerp_Scalar::call(self, tensors1, weight);
+}
+// aten::_foreach_lerp_.Scalar(Tensor(a!)[] self, Tensor[] tensors1, Scalar weight) -> ()
+inline void _foreach_lerp_(at::TensorList self, at::TensorList tensors1, const at::Scalar & weight) {
+    return at::_ops::_foreach_lerp__Scalar::call(self, tensors1, weight);
+}
+// aten::_foreach_lerp.List_out(Tensor[] self, Tensor[] tensors1, Tensor[] weights, *, Tensor(a!)[] out) -> ()
+inline void _foreach_lerp_out(at::TensorList out, at::TensorList self, at::TensorList tensors1, at::TensorList weights) {
+    return at::_ops::_foreach_lerp_List_out::call(self, tensors1, weights, out);
+}
+// aten::_foreach_lerp.List_out(Tensor[] self, Tensor[] tensors1, Tensor[] weights, *, Tensor(a!)[] out) -> ()
+inline void _foreach_lerp_outf(at::TensorList self, at::TensorList tensors1, at::TensorList weights, at::TensorList out) {
+    return at::_ops::_foreach_lerp_List_out::call(self, tensors1, weights, out);
+}
+// aten::_foreach_lerp.Scalar_out(Tensor[] self, Tensor[] tensors1, Scalar weight, *, Tensor(a!)[] out) -> ()
+inline void _foreach_lerp_out(at::TensorList out, at::TensorList self, at::TensorList tensors1, const at::Scalar & weight) {
+    return at::_ops::_foreach_lerp_Scalar_out::call(self, tensors1, weight, out);
+}
+// aten::_foreach_lerp.Scalar_out(Tensor[] self, Tensor[] tensors1, Scalar weight, *, Tensor(a!)[] out) -> ()
+inline void _foreach_lerp_outf(at::TensorList self, at::TensorList tensors1, const at::Scalar & weight, at::TensorList out) {
+    return at::_ops::_foreach_lerp_Scalar_out::call(self, tensors1, weight, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_sinh_native.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API void _foreach_sinh_out(at::TensorList self, at::TensorList out);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_sinh_slow(at::TensorList self);
+TORCH_API void foreach_tensor_sinh_slow_(at::TensorList self);
+TORCH_API ::std::vector<at::Tensor> foreach_tensor_sinh_cuda(at::TensorList self);
+TORCH_API void foreach_tensor_sinh_cuda_(at::TensorList self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sgd.h ADDED Viewed

	@@ -0,0 +1,63 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_fused_sgd_ops.h>
+namespace at {
+// aten::_fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
+inline void _fused_sgd_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale={}, const c10::optional<at::Tensor> & found_inf={}) {
+    return at::_ops::_fused_sgd_::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf);
+}
+// aten::_fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
+inline void _fused_sgd_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale={}, const c10::optional<at::Tensor> & found_inf={}) {
+    return at::_ops::_fused_sgd__tensor_lr::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf);
+}
+// aten::_fused_sgd.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()
+inline void _fused_sgd_out(at::TensorList out, at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale={}, const c10::optional<at::Tensor> & found_inf={}) {
+    return at::_ops::_fused_sgd_out::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf, out);
+}
+// aten::_fused_sgd.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()
+inline void _fused_sgd_outf(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale, const c10::optional<at::Tensor> & found_inf, at::TensorList out) {
+    return at::_ops::_fused_sgd_out::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf, out);
+}
+// aten::_fused_sgd(Tensor[] self, Tensor[] grads, Tensor[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] momentum_buffer_list_out)
+inline ::std::tuple<::std::vector<at::Tensor>,::std::vector<at::Tensor>,::std::vector<at::Tensor>> _fused_sgd(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale={}, const c10::optional<at::Tensor> & found_inf={}) {
+    return at::_ops::_fused_sgd::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf);
+}
+// aten::_fused_sgd.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()
+inline void _fused_sgd_out(at::TensorList out, at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale={}, const c10::optional<at::Tensor> & found_inf={}) {
+    return at::_ops::_fused_sgd_tensor_lr_out::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf, out);
+}
+// aten::_fused_sgd.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()
+inline void _fused_sgd_outf(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale, const c10::optional<at::Tensor> & found_inf, at::TensorList out) {
+    return at::_ops::_fused_sgd_tensor_lr_out::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf, out);
+}
+// aten::_fused_sgd.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] momentum_buffer_list_out)
+inline ::std::tuple<::std::vector<at::Tensor>,::std::vector<at::Tensor>,::std::vector<at::Tensor>> _fused_sgd(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional<at::Tensor> & grad_scale={}, const c10::optional<at::Tensor> & found_inf={}) {
+    return at::_ops::_fused_sgd_tensor_lr::call(self, grads, momentum_buffer_list, weight_decay, momentum, lr, dampening, nesterov, maximize, is_first_step, grad_scale, found_inf);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fw_primal_copy_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API _fw_primal_copy {
+  using schema = at::Tensor (const at::Tensor &, int64_t);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fw_primal_copy")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fw_primal_copy(Tensor self, int level) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, int64_t level);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t level);
+};
+struct TORCH_API _fw_primal_copy_out {
+  using schema = at::Tensor & (const at::Tensor &, int64_t, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fw_primal_copy")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fw_primal_copy.out(Tensor self, int level, *, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & self, int64_t level, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t level, at::Tensor & out);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_index_put_impl_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor & _index_put_impl_(at::Tensor & self, const c10::List<c10::optional<at::Tensor>> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_log_softmax_native.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/_log_softmax_meta.h>
+namespace at {
+namespace native {
+struct TORCH_API structured_log_softmax_cpu_out : public at::meta::structured__log_softmax {
+void impl(const at::Tensor & self, int64_t dim, bool half_to_float, const at::Tensor & out);
+};
+struct TORCH_API structured_log_softmax_cuda_out : public at::meta::structured__log_softmax {
+void impl(const at::Tensor & self, int64_t dim, bool half_to_float, const at::Tensor & out);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_make_per_channel_quantized_tensor.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_make_per_channel_quantized_tensor_ops.h>
+namespace at {
+// aten::_make_per_channel_quantized_tensor(Tensor self, Tensor scale, Tensor zero_point, int axis) -> Tensor
+inline at::Tensor _make_per_channel_quantized_tensor(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis) {
+    return at::_ops::_make_per_channel_quantized_tensor::call(self, scale, zero_point, axis);
+}
+// aten::_make_per_channel_quantized_tensor.out(Tensor self, Tensor scale, Tensor zero_point, int axis, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _make_per_channel_quantized_tensor_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis) {
+    return at::_ops::_make_per_channel_quantized_tensor_out::call(self, scale, zero_point, axis, out);
+}
+// aten::_make_per_channel_quantized_tensor.out(Tensor self, Tensor scale, Tensor zero_point, int axis, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _make_per_channel_quantized_tensor_outf(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, at::Tensor & out) {
+    return at::_ops::_make_per_channel_quantized_tensor_out::call(self, scale, zero_point, axis, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_neg_view_copy_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & _neg_view_copy_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & _neg_view_copy_outf(const at::Tensor & self, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_softmax_with_shape_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor NestedTensor_softmax_dropout(const at::Tensor & self, const at::Tensor & query);
+TORCH_API at::Tensor NestedTensor_softmax_dropout_cuda(const at::Tensor & self, const at::Tensor & query);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_strides_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & _nested_tensor_strides_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & _nested_tensor_strides_outf(const at::Tensor & self, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_from_tensor.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_reshape_from_tensor_ops.h>
+namespace at {
+// aten::_reshape_from_tensor(Tensor self, Tensor shape) -> Tensor
+inline at::Tensor _reshape_from_tensor(const at::Tensor & self, const at::Tensor & shape) {
+    return at::_ops::_reshape_from_tensor::call(self, shape);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_resize_output_meta_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace meta {
+TORCH_API const at::Tensor & _resize_output_(const at::Tensor & self, at::IntArrayRef size, at::Device device);
+TORCH_API const at::Tensor & _resize_output__symint(const at::Tensor & self, c10::SymIntArrayRef size, at::Device device);
+} // namespace meta
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_slow_conv2d_backward_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _slow_conv2d_backward_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, ::std::array<bool,3> output_mask);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _slow_conv2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, ::std::array<bool,3> output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _slow_conv2d_backward_symint_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array<bool,3> output_mask);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _slow_conv2d_backward_symint_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array<bool,3> output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_ops.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API _sparse_broadcast_to {
+  using schema = at::Tensor (const at::Tensor &, at::IntArrayRef);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_sparse_broadcast_to")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)")
+  static at::Tensor call(const at::Tensor & self, at::IntArrayRef size);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef size);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self);
+TORCH_API at::Tensor & _sparse_log_softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, c10::optional<at::ScalarType> dtype=c10::nullopt);
+TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, at::Dimname dim, c10::optional<at::ScalarType> dtype=c10::nullopt);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_stack.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_stack_ops.h>
+namespace at {
+// aten::_stack(Tensor[] tensors, int dim=0) -> Tensor
+inline at::Tensor _stack(at::TensorList tensors, int64_t dim=0) {
+    return at::_ops::_stack::call(tensors, dim);
+}
+// aten::_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _stack_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0) {
+    return at::_ops::_stack_out::call(tensors, dim, out);
+}
+// aten::_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _stack_outf(at::TensorList tensors, int64_t dim, at::Tensor & out) {
+    return at::_ops::_stack_out::call(tensors, dim, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_optional_intlist_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor _test_optional_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_test_parallel_materialize_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor _test_parallel_materialize(const at::Tensor & self, int64_t num_parallel, bool skip_first=false);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _thnn_fused_lstm_cell_out(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const c10::optional<at::Tensor> & input_bias, const c10::optional<at::Tensor> & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2);
+TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _thnn_fused_lstm_cell_cuda(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const c10::optional<at::Tensor> & input_bias={}, const c10::optional<at::Tensor> & hidden_bias={});
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_bsc.h ADDED Viewed

	@@ -0,0 +1,34 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_to_sparse_bsc_ops.h>
+namespace at {
+// aten::_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _to_sparse_bsc_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, c10::optional<int64_t> dense_dim=c10::nullopt) {
+    return at::_ops::_to_sparse_bsc_out::call(self, blocksize, dense_dim, out);
+}
+// aten::_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _to_sparse_bsc_outf(const at::Tensor & self, at::IntArrayRef blocksize, c10::optional<int64_t> dense_dim, at::Tensor & out) {
+    return at::_ops::_to_sparse_bsc_out::call(self, blocksize, dense_dim, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_upsample_nearest_exact2d_backward_ops.h>
+namespace at {
+// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+inline at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input);
+  }
+}
+// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+inline at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, c10::optional<double> scales_h, c10::optional<double> scales_w, at::Tensor & grad_input) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, c10::optional<double> scales_h, c10::optional<double> scales_w, at::Tensor & grad_input) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input);
+  }
+}
+// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+inline at::Tensor & _upsample_nearest_exact2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input);
+  }
+}
+// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+inline at::Tensor & _upsample_nearest_exact2d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional<double> scales_h, c10::optional<double> scales_w, at::Tensor & grad_input) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional<double> scales_h, c10::optional<double> scales_w, at::Tensor & grad_input) {
+    return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input);
+  }
+}
+// aten::_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
+inline at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w);
+  }
+}
+// aten::_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
+inline at::Tensor _upsample_nearest_exact2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, output_size, input_size, scales_h, scales_w);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt) {
+    return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, output_size, input_size, scales_h, scales_w);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor _weight_int4pack_mm(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/absolute_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor absolute(const at::Tensor & self);
+TORCH_API at::Tensor & absolute_out(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & absolute_(at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/alias_copy_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor alias_copy(const at::Tensor & self);
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arcsin.h ADDED Viewed

	@@ -0,0 +1,44 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/arcsin_ops.h>
+namespace at {
+// aten::arcsin(Tensor self) -> Tensor
+inline at::Tensor arcsin(const at::Tensor & self) {
+    return at::_ops::arcsin::call(self);
+}
+// aten::arcsin_(Tensor(a!) self) -> Tensor(a!)
+inline at::Tensor & arcsin_(at::Tensor & self) {
+    return at::_ops::arcsin_::call(self);
+}
+// aten::arcsin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & arcsin_out(at::Tensor & out, const at::Tensor & self) {
+    return at::_ops::arcsin_out::call(self, out);
+}
+// aten::arcsin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & arcsin_outf(const at::Tensor & self, at::Tensor & out) {
+    return at::_ops::arcsin_out::call(self, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/argmax_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor argmax(const at::Tensor & self, c10::optional<int64_t> dim=c10::nullopt, bool keepdim=false);
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/avg_pool3d_backward_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor avg_pool3d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional<int64_t> divisor_override);
+TORCH_API at::Tensor & avg_pool3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional<int64_t> divisor_override);
+TORCH_API at::Tensor & avg_pool3d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional<int64_t> divisor_override, at::Tensor & grad_input);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ccol_indices_copy_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor & ccol_indices_copy_out(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor ccol_indices_copy(const at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/diag_embed_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor diag_embed(const at::Tensor & self, int64_t offset=0, int64_t dim1=-2, int64_t dim2=-1);
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/digamma_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor digamma(const at::Tensor & self);
+TORCH_API at::Tensor & digamma_(at::Tensor & self);
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/eq_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor eq(const at::Tensor & self, const at::Scalar & other);
+TORCH_API at::Tensor & eq_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other);
+TORCH_API at::Tensor & eq_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out);
+TORCH_API at::Tensor & eq_(at::Tensor & self, const at::Scalar & other);
+TORCH_API at::Tensor eq(const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & eq_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & eq_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
+TORCH_API at::Tensor & eq_(at::Tensor & self, const at::Tensor & other);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/exp.h ADDED Viewed

	@@ -0,0 +1,44 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/exp_ops.h>
+namespace at {
+// aten::exp(Tensor self) -> Tensor
+inline at::Tensor exp(const at::Tensor & self) {
+    return at::_ops::exp::call(self);
+}
+// aten::exp_(Tensor(a!) self) -> Tensor(a!)
+inline at::Tensor & exp_(at::Tensor & self) {
+    return at::_ops::exp_::call(self);
+}
+// aten::exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & exp_out(at::Tensor & out, const at::Tensor & self) {
+    return at::_ops::exp_out::call(self, out);
+}
+// aten::exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & exp_outf(const at::Tensor & self, at::Tensor & out) {
+    return at::_ops::exp_out::call(self, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfft_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API at::Tensor fft_rfft(const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt);
+TORCH_API at::Tensor fft_rfft_symint(const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt);
+TORCH_API at::Tensor & fft_rfft_out(at::Tensor & out, const at::Tensor & self, c10::optional<int64_t> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt);
+TORCH_API at::Tensor & fft_rfft_outf(const at::Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out);
+TORCH_API at::Tensor & fft_rfft_symint_out(at::Tensor & out, const at::Tensor & self, c10::optional<c10::SymInt> n=c10::nullopt, int64_t dim=-1, c10::optional<c10::string_view> norm=c10::nullopt);
+TORCH_API at::Tensor & fft_rfft_symint_outf(const at::Tensor & self, c10::optional<c10::SymInt> n, int64_t dim, c10::optional<c10::string_view> norm, at::Tensor & out);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_rfftfreq_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API fft_rfftfreq {
+  using schema = at::Tensor (int64_t, double, c10::optional<at::ScalarType>, c10::optional<at::Layout>, c10::optional<at::Device>, c10::optional<bool>);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::fft_rfftfreq")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "fft_rfftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor")
+  static at::Tensor call(int64_t n, double d, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, int64_t n, double d, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory);
+};
+struct TORCH_API fft_rfftfreq_out {
+  using schema = at::Tensor & (int64_t, double, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::fft_rfftfreq")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "fft_rfftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(int64_t n, double d, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, int64_t n, double d, at::Tensor & out);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices);
+TORCH_API at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices);
+TORCH_API at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gcd_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor gcd(const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & gcd_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other);
+TORCH_API at::Tensor & gcd_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out);
+TORCH_API at::Tensor & gcd_(at::Tensor & self, const at::Tensor & other);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/grid_sampler_2d_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API grid_sampler_2d {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, int64_t, bool);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::grid_sampler_2d")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor")
+  static at::Tensor call(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners);
+};
+struct TORCH_API grid_sampler_2d_out {
+  using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, int64_t, bool, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::grid_sampler_2d")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "grid_sampler_2d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gru_cell_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor gru_cell(const at::Tensor & input, const at::Tensor & hx, const at::Tensor & w_ih, const at::Tensor & w_hh, const c10::optional<at::Tensor> & b_ih={}, const c10::optional<at::Tensor> & b_hh={});
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/index_add_ops.h ADDED Viewed

	@@ -0,0 +1,61 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API index_add_out {
+  using schema = at::Tensor & (const at::Tensor &, int64_t, const at::Tensor &, const at::Tensor &, const at::Scalar &, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::index_add")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha, at::Tensor & out);
+};
+struct TORCH_API index_add_ {
+  using schema = at::Tensor & (at::Tensor &, int64_t, const at::Tensor &, const at::Tensor &, const at::Scalar &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::index_add_")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!)")
+  static at::Tensor & call(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha);
+};
+struct TORCH_API index_add {
+  using schema = at::Tensor (const at::Tensor &, int64_t, const at::Tensor &, const at::Tensor &, const at::Scalar &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::index_add")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha);
+};
+struct TORCH_API index_add_dimname {
+  using schema = at::Tensor (const at::Tensor &, at::Dimname, const at::Tensor &, const at::Tensor &, const at::Scalar &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::index_add")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "dimname")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/index_reduce_meta_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace meta {
+TORCH_API at::Tensor index_reduce(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true);
+TORCH_API at::Tensor & index_reduce_out(at::Tensor & out, const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true);
+TORCH_API at::Tensor & index_reduce_outf(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self, at::Tensor & out);
+TORCH_API at::Tensor & index_reduce_(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true);
+} // namespace meta
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API bool is_conj(const at::Tensor & self);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_distributed_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API bool is_distributed(const at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_inference_compositeimplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeimplicitautograd {
+TORCH_API bool is_inference(const at::Tensor & self);
+} // namespace compositeimplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isclose_ops.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API isclose {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &, double, double, bool);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::isclose")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, const at::Tensor & other, double rtol, double atol, bool equal_nan);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, double rtol, double atol, bool equal_nan);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_lu_solve_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/linalg_lu_solve_meta.h>
+namespace at {
+namespace native {
+struct TORCH_API structured_linalg_lu_solve_out : public at::meta::structured_linalg_lu_solve {
+void impl(const at::Tensor & LU, const at::Tensor & pivots, const at::Tensor & B, bool left, bool adjoint, const at::Tensor & out);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_norm.h ADDED Viewed

	@@ -0,0 +1,53 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/linalg_norm_ops.h>
+namespace at {
+// aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+inline at::Tensor linalg_norm(const at::Tensor & self, const c10::optional<at::Scalar> & ord=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, bool keepdim=false, c10::optional<at::ScalarType> dtype=c10::nullopt) {
+    return at::_ops::linalg_norm::call(self, ord, dim, keepdim, dtype);
+}
+// aten::linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+inline at::Tensor linalg_norm(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim=c10::nullopt, bool keepdim=false, c10::optional<at::ScalarType> dtype=c10::nullopt) {
+    return at::_ops::linalg_norm_ord_str::call(self, ord, dim, keepdim, dtype);
+}
+// aten::linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & linalg_norm_out(at::Tensor & out, const at::Tensor & self, const c10::optional<at::Scalar> & ord=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, bool keepdim=false, c10::optional<at::ScalarType> dtype=c10::nullopt) {
+    return at::_ops::linalg_norm_out::call(self, ord, dim, keepdim, dtype, out);
+}
+// aten::linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & linalg_norm_outf(const at::Tensor & self, const c10::optional<at::Scalar> & ord, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype, at::Tensor & out) {
+    return at::_ops::linalg_norm_out::call(self, ord, dim, keepdim, dtype, out);
+}
+// aten::linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & linalg_norm_out(at::Tensor & out, const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim=c10::nullopt, bool keepdim=false, c10::optional<at::ScalarType> dtype=c10::nullopt) {
+    return at::_ops::linalg_norm_ord_str_out::call(self, ord, dim, keepdim, dtype, out);
+}
+// aten::linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & linalg_norm_outf(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, c10::optional<at::ScalarType> dtype, at::Tensor & out) {
+    return at::_ops::linalg_norm_ord_str_out::call(self, ord, dim, keepdim, dtype, out);
+}
+}