diff --git a/.gitattributes b/.gitattributes index b076c7e468edac29543e48ad0e4021d0b441fca6..6a190f740e1fc1e4bde8b104975ca9922ea2b8c0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -80,3 +80,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/ tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/t64-arm.exe filter=lfs diff=lfs merge=lfs -text tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_runtime/lib/libcudart.so.11.0 filter=lfs diff=lfs merge=lfs -text tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cudagraph_trees.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/parallel/__pycache__/distributed.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_convert_indices_from_coo_to_csr_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_convert_indices_from_coo_to_csr_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c3ffa29b477bd519476cd5813b4ccfce405dc19c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_convert_indices_from_coo_to_csr_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _convert_indices_from_coo_to_csr { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_convert_indices_from_coo_to_csr") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor") + static at::Tensor call(const at::Tensor & self, int64_t size, bool out_int32); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t size, bool out_int32); +}; + +struct TORCH_API _convert_indices_from_coo_to_csr_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_convert_indices_from_coo_to_csr") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_convert_indices_from_coo_to_csr.out(Tensor self, int size, *, bool out_int32=False, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, int64_t size, bool out_int32, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t size, bool out_int32, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_ctc_loss_meta_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_ctc_loss_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2682f353aad5047ffd6b9bbb38af36dad7b2a5c9 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_ctc_loss_meta_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API ::std::tuple _ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank=0, bool zero_infinity=false); + +} // namespace meta +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_backward_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..388b8cb303415041b671f78022462b9458e98921 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_backward_cuda_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple> _cudnn_rnn_backward(const at::Tensor & input, at::TensorList weight, int64_t weight_stride0, const at::Tensor & weight_buf, const at::Tensor & hx, const c10::optional & cx, const at::Tensor & output, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, at::IntArrayRef batch_sizes, const c10::optional & dropout_state, const at::Tensor & reserve, ::std::array output_mask); +TORCH_API ::std::tuple> _cudnn_rnn_backward_symint(const at::Tensor & input, at::TensorList weight, int64_t weight_stride0, const at::Tensor & weight_buf, const at::Tensor & hx, const c10::optional & cx, const at::Tensor & output, const c10::optional & grad_output, const c10::optional & grad_hy, const c10::optional & grad_cy, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, c10::SymIntArrayRef batch_sizes, const c10::optional & dropout_state, const at::Tensor & reserve, ::std::array output_mask); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f16336dc11433747431aa75334eb7605690ca20e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_debug_has_internal_overlap_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t _debug_has_internal_overlap(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_native.h new file mode 100644 index 0000000000000000000000000000000000000000..545a13070ca8561ddbbe1cf9e927ed5f8cac541d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _fake_quantize_learnable_per_channel_affine_out(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, at::Tensor & out); +TORCH_API at::Tensor _fake_quantize_learnable_per_channel_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor=1.0); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_ceil.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_ceil.h new file mode 100644 index 0000000000000000000000000000000000000000..141968c355806bfa45dbfc7beabd1657d4f756ca --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_ceil.h @@ -0,0 +1,44 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_foreach_ceil(Tensor[] self) -> Tensor[] +inline ::std::vector _foreach_ceil(at::TensorList self) { + return at::_ops::_foreach_ceil::call(self); +} + +// aten::_foreach_ceil_(Tensor(a!)[] self) -> () +inline void _foreach_ceil_(at::TensorList self) { + return at::_ops::_foreach_ceil_::call(self); +} + +// aten::_foreach_ceil.out(Tensor[] self, *, Tensor(a!)[] out) -> () +inline void _foreach_ceil_out(at::TensorList out, at::TensorList self) { + return at::_ops::_foreach_ceil_out::call(self, out); +} +// aten::_foreach_ceil.out(Tensor[] self, *, Tensor(a!)[] out) -> () +inline void _foreach_ceil_outf(at::TensorList self, at::TensorList out) { + return at::_ops::_foreach_ceil_out::call(self, out); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_pow_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_pow_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f85c7d11c55a65cccaf4da98a4e7b102f1aaaa4c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_pow_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API void _foreach_pow_out(at::TensorList out, at::TensorList self, at::TensorList exponent); +TORCH_API void _foreach_pow_outf(at::TensorList self, at::TensorList exponent, at::TensorList out); +TORCH_API void _foreach_pow_out(at::TensorList out, at::TensorList self, const at::Scalar & exponent); +TORCH_API void _foreach_pow_outf(at::TensorList self, const at::Scalar & exponent, at::TensorList out); +TORCH_API void _foreach_pow_out(at::TensorList out, at::TensorList self, at::ArrayRef exponent); +TORCH_API void _foreach_pow_outf(at::TensorList self, at::ArrayRef exponent, at::TensorList out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_adamw_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_adamw_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bf58972aaf2b57587e9387a1cfdbb0425ddf304f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_adamw_cuda_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API void _fused_adamw_(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale={}, const c10::optional & found_inf={}); +TORCH_API void _fused_adamw_(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale={}, const c10::optional & found_inf={}); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_adamw_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_adamw_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9026efb35a303ce3d98c194a062cfdf9f10ea84d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_adamw_ops.h @@ -0,0 +1,83 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fused_adamw_ { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, double, double, double, double, double, bool, bool, const c10::optional &, const c10::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_adamw_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()") + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); +}; + +struct TORCH_API _fused_adamw__tensor_lr { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, const at::Tensor &, double, double, double, double, bool, bool, const c10::optional &, const c10::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_adamw_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "tensor_lr") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()") + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); +}; + +struct TORCH_API _fused_adamw_out { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, double, double, double, double, double, bool, bool, const c10::optional &, const c10::optional &, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_adamw") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_adamw.out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()") + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, at::TensorList out); +}; + +struct TORCH_API _fused_adamw { + using schema = ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, double, double, double, double, double, bool, bool, const c10::optional &, const c10::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_adamw") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_adamw(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)") + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, double lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); +}; + +struct TORCH_API _fused_adamw_tensor_lr_out { + using schema = void (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, const at::Tensor &, double, double, double, double, bool, bool, const c10::optional &, const c10::optional &, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_adamw") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "tensor_lr_out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_adamw.tensor_lr_out(Tensor[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None, Tensor(a!)[] out) -> ()") + static void call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf, at::TensorList out); +}; + +struct TORCH_API _fused_adamw_tensor_lr { + using schema = ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> (at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, at::TensorList, const at::Tensor &, double, double, double, double, bool, bool, const c10::optional &, const c10::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_adamw") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "tensor_lr") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_adamw.tensor_lr(Tensor[] self, Tensor[] grads, Tensor[] exp_avgs, Tensor[] exp_avg_sqs, Tensor[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> (Tensor[] self_out, Tensor[] grads_out, Tensor[] exp_avgs_out, Tensor[] exp_avg_sqs_out, Tensor[] max_exp_avg_sqs_out)") + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> call(at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); + static ::std::tuple<::std::vector,::std::vector,::std::vector,::std::vector,::std::vector> redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList grads, at::TensorList exp_avgs, at::TensorList exp_avg_sqs, at::TensorList max_exp_avg_sqs, at::TensorList state_steps, const at::Tensor & lr, double beta1, double beta2, double weight_decay, double eps, bool amsgrad, bool maximize, const c10::optional & grad_scale, const c10::optional & found_inf); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sdp_choice_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sdp_choice_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..15f6c5a2e725286226b4b5f0c5c81d517c3a54f5 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sdp_choice_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fused_sdp_choice { + using schema = int64_t (const at::Tensor &, const at::Tensor &, const at::Tensor &, const c10::optional &, double, bool, c10::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_fused_sdp_choice") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int") + static int64_t call(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional & attn_mask, double dropout_p, bool is_causal, c10::optional scale); + static int64_t redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional & attn_mask, double dropout_p, bool is_causal, c10::optional scale); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sgd_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sgd_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f97cbfffa2f7820a16c2da4a1f54e939314a3a41 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sgd_cuda_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API void _fused_sgd_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale={}, const c10::optional & found_inf={}); +TORCH_API void _fused_sgd_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const c10::optional & grad_scale={}, const c10::optional & found_inf={}); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c93da9b82d72ec728aa031e4985b6a6231c30475 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_native_multi_head_attention_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _native_multi_head_attention_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask={}, bool need_weights=true, bool average_attn_weights=true, c10::optional mask_type=c10::nullopt); +TORCH_API ::std::tuple _native_multi_head_attention_outf(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask, bool need_weights, bool average_attn_weights, c10::optional mask_type, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned.h new file mode 100644 index 0000000000000000000000000000000000000000..5da62e1101ad68fd8d9d2c0156ec914dd6327175 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned.h @@ -0,0 +1,30 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_nested_tensor_from_mask_left_aligned(Tensor t, Tensor mask) -> bool +inline bool _nested_tensor_from_mask_left_aligned(const at::Tensor & t, const at::Tensor & mask) { + return at::_ops::_nested_tensor_from_mask_left_aligned::call(t, mask); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_pin_memory_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_pin_memory_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..4c364fd8e61f6676e13f2a66da4f5d405d89d1e3 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_pin_memory_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _pin_memory_out(at::Tensor & out, const at::Tensor & self, c10::optional device=c10::nullopt); +TORCH_API at::Tensor & _pin_memory_outf(const at::Tensor & self, c10::optional device, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_alias_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_alias_native.h new file mode 100644 index 0000000000000000000000000000000000000000..38b42566f200171fb1d9c9cc225700723bcab90f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_reshape_alias_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _reshape_alias(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_segment_reduce_backward.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_segment_reduce_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..394ad07461ee9e961ac791b0537d58a33589f1b0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_segment_reduce_backward.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_segment_reduce_backward(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None) -> Tensor +inline at::Tensor _segment_reduce_backward(const at::Tensor & grad, const at::Tensor & output, const at::Tensor & data, c10::string_view reduce, const c10::optional & lengths={}, const c10::optional & offsets={}, int64_t axis=0, const c10::optional & initial=c10::nullopt) { + return at::_ops::_segment_reduce_backward::call(grad, output, data, reduce, lengths, offsets, axis, initial); +} + +// aten::_segment_reduce_backward.out(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _segment_reduce_backward_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & output, const at::Tensor & data, c10::string_view reduce, const c10::optional & lengths={}, const c10::optional & offsets={}, int64_t axis=0, const c10::optional & initial=c10::nullopt) { + return at::_ops::_segment_reduce_backward_out::call(grad, output, data, reduce, lengths, offsets, axis, initial, out); +} +// aten::_segment_reduce_backward.out(Tensor grad, Tensor output, Tensor data, str reduce, *, Tensor? lengths=None, Tensor? offsets=None, int axis=0, Scalar? initial=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _segment_reduce_backward_outf(const at::Tensor & grad, const at::Tensor & output, const at::Tensor & data, c10::string_view reduce, const c10::optional & lengths, const c10::optional & offsets, int64_t axis, const c10::optional & initial, at::Tensor & out) { + return at::_ops::_segment_reduce_backward_out::call(grad, output, data, reduce, lengths, offsets, axis, initial, out); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_softmax_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_softmax_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..156e618d581416743636150a9d44c665f20ccaea --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_softmax_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ada52eef057dfebe7bf8e6c4491d8f1fad7236cb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_sparse_matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & _sparse_sparse_matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_csr_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_csr_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a6f93df60e7b7ef122435b3371ee6fd3032777ce --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_to_sparse_csr_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_csr { + using schema = at::Tensor (const at::Tensor &, c10::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_to_sparse_csr") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor") + static at::Tensor call(const at::Tensor & self, c10::optional dense_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::optional dense_dim); +}; + +struct TORCH_API _to_sparse_csr_out { + using schema = at::Tensor & (const at::Tensor &, c10::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::_to_sparse_csr") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "_to_sparse_csr.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, c10::optional dense_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::optional dense_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_values_copy_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_values_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1c767451407e14660b043ffc35aa325ea3a60cee --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_values_copy_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _values_copy_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor _values_copy(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/addcmul_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/addcmul_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..29d73942a462dc5fff9c7793e87231f0b81742e2 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/addcmul_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor addcmul(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1); +TORCH_API at::Tensor & addcmul_(at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_to_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_to_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..06bddcb5e223516726dd75a63d716714fcb17a5e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/align_to_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor align_to(const at::Tensor & self, at::DimnameList names); +TORCH_API at::Tensor align_to(const at::Tensor & self, at::DimnameList order, int64_t ellipsis_idx); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_gather_stats.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_gather_stats.h new file mode 100644 index 0000000000000000000000000000000000000000..71eba6a0f43cc6aac4fb4bd64a4ac7b162a221d6 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_gather_stats.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::batch_norm_gather_stats(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count) -> (Tensor, Tensor) +inline ::std::tuple batch_norm_gather_stats(const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, int64_t count) { + return at::_ops::batch_norm_gather_stats::call(input, mean, invstd, running_mean, running_var, momentum, eps, count); +} + +// aten::batch_norm_gather_stats.out(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple batch_norm_gather_stats_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, int64_t count) { + return at::_ops::batch_norm_gather_stats_out::call(input, mean, invstd, running_mean, running_var, momentum, eps, count, out0, out1); +} +// aten::batch_norm_gather_stats.out(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple batch_norm_gather_stats_outf(const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const c10::optional & running_mean, const c10::optional & running_var, double momentum, double eps, int64_t count, at::Tensor & out0, at::Tensor & out1) { + return at::_ops::batch_norm_gather_stats_out::call(input, mean, invstd, running_mean, running_var, momentum, eps, count, out0, out1); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bincount_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bincount_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..28f7c0c33e7bef7d2c88687fcd10f7664ad3a3a8 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bincount_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API bincount { + using schema = at::Tensor (const at::Tensor &, const c10::optional &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::bincount") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "bincount(Tensor self, Tensor? weights=None, int minlength=0) -> Tensor") + static at::Tensor call(const at::Tensor & self, const c10::optional & weights, int64_t minlength); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::optional & weights, int64_t minlength); +}; + +struct TORCH_API bincount_out { + using schema = at::Tensor & (const at::Tensor &, const c10::optional &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::bincount") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "bincount.out(Tensor self, Tensor? weights=None, int minlength=0, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, const c10::optional & weights, int64_t minlength, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::optional & weights, int64_t minlength, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..410ba97addc361f48364faef9bf8a941cb873adf --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor bitwise_and(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_(at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cholesky_inverse_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cholesky_inverse_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..86aa245954b91ed1837388a38d28806cd0723a31 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cholesky_inverse_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor cholesky_inverse(const at::Tensor & self, bool upper=false); +TORCH_API at::Tensor & cholesky_inverse_out(at::Tensor & out, const at::Tensor & self, bool upper=false); +TORCH_API at::Tensor & cholesky_inverse_outf(const at::Tensor & self, bool upper, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/conv1d_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/conv1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6390af704edb99cfdf333ca5166c8d9081870e95 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/conv1d_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor conv1d_symint(const at::Tensor & input, const at::Tensor & weight, const c10::optional & bias={}, c10::SymIntArrayRef stride=c10::SymInt(1), c10::SymIntArrayRef padding=c10::SymInt(0), c10::SymIntArrayRef dilation=c10::SymInt(1), c10::SymInt groups=1); +TORCH_API at::Tensor conv1d_padding_symint(const at::Tensor & input, const at::Tensor & weight, const c10::optional & bias={}, c10::SymIntArrayRef stride=c10::SymInt(1), c10::string_view padding="valid", c10::SymIntArrayRef dilation=c10::SymInt(1), c10::SymInt groups=1); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/conv_transpose3d_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/conv_transpose3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2b98c4b61e7347580b7c4e722af53cc2282fdd9f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/conv_transpose3d_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor conv_transpose3d_symint(const at::Tensor & input, const at::Tensor & weight, const c10::optional & bias={}, c10::SymIntArrayRef stride=c10::SymInt(1), c10::SymIntArrayRef padding=c10::SymInt(0), c10::SymIntArrayRef output_padding=c10::SymInt(0), c10::SymInt groups=1, c10::SymIntArrayRef dilation=c10::SymInt(1)); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cov.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cov.h new file mode 100644 index 0000000000000000000000000000000000000000..c3bcce833da2897e1a754f9329ffa2a0bb5e2936 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cov.h @@ -0,0 +1,30 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cov(Tensor self, *, int correction=1, Tensor? fweights=None, Tensor? aweights=None) -> Tensor +inline at::Tensor cov(const at::Tensor & self, int64_t correction=1, const c10::optional & fweights={}, const c10::optional & aweights={}) { + return at::_ops::cov::call(self, correction, fweights, aweights); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_batch_norm_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_batch_norm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ba8000d5154050959c0bfa3a44ed9bf00ec09e2d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_batch_norm_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple cudnn_batch_norm(const at::Tensor & input, const at::Tensor & weight, const c10::optional & bias, const c10::optional & running_mean, const c10::optional & running_var, bool training, double exponential_average_factor, double epsilon); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose_native.h new file mode 100644 index 0000000000000000000000000000000000000000..55bfe1d981b1d276686d0d19be784437cc0c9458 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_convolution_transpose_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & cudnn_convolution_transpose_out_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, at::Tensor & out); +TORCH_API at::Tensor cudnn_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/expand_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/expand_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4a0b82a7fa315eb6e8bba6fea18731d1c7bc590b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/expand_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API expand { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::expand") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)") + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size, bool implicit); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfftn.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfftn.h new file mode 100644 index 0000000000000000000000000000000000000000..252d08594b292076c31efc6e86e90637f47b8132 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfftn.h @@ -0,0 +1,91 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor +inline at::Tensor fft_hfftn(const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm); +} +namespace symint { + template ::value>> + at::Tensor fft_hfftn(const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm); + } +} + +// aten::fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor +inline at::Tensor fft_hfftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn::call(self, s, dim, norm); +} +namespace symint { + template ::value>> + at::Tensor fft_hfftn(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn::call(self, s, dim, norm); + } +} + +// aten::fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline const at::Tensor & fft_hfftn_out(const at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out); +} +namespace symint { + template ::value>> + const at::Tensor & fft_hfftn_out(const at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out); + } +} + +// aten::fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline const at::Tensor & fft_hfftn_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, const at::Tensor & out) { + return at::_ops::fft_hfftn_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out); +} +namespace symint { + template ::value>> + const at::Tensor & fft_hfftn_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, const at::Tensor & out) { + return at::_ops::fft_hfftn_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out); + } +} + +// aten::fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline const at::Tensor & fft_hfftn_symint_out(const at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn_out::call(self, s, dim, norm, out); +} +namespace symint { + template ::value>> + const at::Tensor & fft_hfftn_out(const at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::OptionalIntArrayRef dim=c10::nullopt, c10::optional norm=c10::nullopt) { + return at::_ops::fft_hfftn_out::call(self, s, dim, norm, out); + } +} + +// aten::fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) +inline const at::Tensor & fft_hfftn_symint_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, const at::Tensor & out) { + return at::_ops::fft_hfftn_out::call(self, s, dim, norm, out); +} +namespace symint { + template ::value>> + const at::Tensor & fft_hfftn_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, const at::Tensor & out) { + return at::_ops::fft_hfftn_out::call(self, s, dim, norm, out); + } +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfftn_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfftn_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c3af7d1c70d7680271a1ed77fc9b0e0af6f52ced --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_hfftn_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fft_hfftn { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, at::OptionalIntArrayRef, c10::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::fft_hfftn") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor") + static at::Tensor call(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm); +}; + +struct TORCH_API fft_hfftn_out { + using schema = const at::Tensor & (const at::Tensor &, at::OptionalSymIntArrayRef, at::OptionalIntArrayRef, c10::optional, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::fft_hfftn") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)") + static const at::Tensor & call(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, const at::Tensor & out); + static const at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, c10::optional norm, const at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fe10f78d34355d617c659ddce3acc4fb8377d0d1 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ff4abff062f07b79ffb494a6bbe740126a26e72b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/gelu_backward_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_gelu_backward_out_cpu : public at::meta::structured_gelu_backward { +void impl(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate, const at::Tensor & grad_input); +}; +struct TORCH_API structured_gelu_backward_out_cuda : public at::meta::structured_gelu_backward { +void impl(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate, const at::Tensor & grad_input); +}; +TORCH_API at::Tensor gelu_backwards_nested(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate="none"); +TORCH_API at::Tensor mkldnn_gelu_backward(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate="none"); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a454a6da1ee244b5bedbaccdca79d05e7e36411d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/glu_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor glu(const at::Tensor & self, int64_t dim=-1); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hstack_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hstack_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bb99b3f3e9a590d5130d2c0de929ae25baed0bf8 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/hstack_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor hstack(at::TensorList tensors); +TORCH_API at::Tensor & hstack_out(at::TensorList tensors, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isneginf_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isneginf_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ee95465e0ff7d2100d0c4ee50262e48127785e7c --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isneginf_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor isneginf(const at::Tensor & self); +TORCH_API at::Tensor & isneginf_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & isneginf_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/less.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/less.h new file mode 100644 index 0000000000000000000000000000000000000000..25775e2f016e90c7de6dc568cd3f3790b9f72192 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/less.h @@ -0,0 +1,53 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::less.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & less_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other) { + return at::_ops::less_Scalar_out::call(self, other, out); +} +// aten::less.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & less_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out) { + return at::_ops::less_Scalar_out::call(self, other, out); +} + +// aten::less.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor less(const at::Tensor & self, const at::Scalar & other) { + return at::_ops::less_Scalar::call(self, other); +} + +// aten::less.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & less_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::less_Tensor_out::call(self, other, out); +} +// aten::less.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & less_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::less_Tensor_out::call(self, other, out); +} + +// aten::less.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor less(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::less_Tensor::call(self, other); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/less_equal_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/less_equal_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..74e1a74747a23599d524de477ea2d2b45e7d5b86 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/less_equal_ops.h @@ -0,0 +1,83 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API less_equal_Scalar_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::less_equal") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Scalar_out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "less_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +}; + +struct TORCH_API less_equal_Scalar { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::less_equal") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Scalar") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "less_equal.Scalar(Tensor self, Scalar other) -> Tensor") + static at::Tensor call(const at::Tensor & self, const at::Scalar & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other); +}; + +struct TORCH_API less_equal_Tensor_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::less_equal") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor_out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "less_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +struct TORCH_API less_equal_Tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::less_equal") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "less_equal.Tensor(Tensor self, Tensor other) -> Tensor") + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API less_equal__Scalar { + using schema = at::Tensor & (at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::less_equal_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Scalar") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "less_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)") + static at::Tensor & call(at::Tensor & self, const at::Scalar & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Scalar & other); +}; + +struct TORCH_API less_equal__Tensor { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::less_equal_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "less_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)") + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8ef384282b37fc358c66b289825fa229a0133d2d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_matmul_compositeimplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor linalg_matmul(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & linalg_matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & linalg_matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4a9c7783b58ab10d868a3e81a771bb580a219b43 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_qr_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_linalg_qr_out : public at::meta::structured_linalg_qr { +void impl(const at::Tensor & A, c10::string_view mode, const at::Tensor & Q, const at::Tensor & R); +}; +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svdvals.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svdvals.h new file mode 100644 index 0000000000000000000000000000000000000000..f2e7e81d78d3f6acf83817c19aa37fc29002a866 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_svdvals.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_svdvals(Tensor A, *, str? driver=None) -> Tensor +inline at::Tensor linalg_svdvals(const at::Tensor & A, c10::optional driver=c10::nullopt) { + return at::_ops::linalg_svdvals::call(A, driver); +} + +// aten::linalg_svdvals.out(Tensor A, *, str? driver=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_svdvals_out(at::Tensor & out, const at::Tensor & A, c10::optional driver=c10::nullopt) { + return at::_ops::linalg_svdvals_out::call(A, driver, out); +} +// aten::linalg_svdvals.out(Tensor A, *, str? driver=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_svdvals_outf(const at::Tensor & A, c10::optional driver, at::Tensor & out) { + return at::_ops::linalg_svdvals_out::call(A, driver, out); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_tensorsolve_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_tensorsolve_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ee6380b4d02d9c9e67634efa52acb590dc94d3fb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_tensorsolve_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor linalg_tensorsolve(const at::Tensor & self, const at::Tensor & other, at::OptionalIntArrayRef dims=c10::nullopt); +TORCH_API at::Tensor & linalg_tensorsolve_out(const at::Tensor & self, const at::Tensor & other, at::OptionalIntArrayRef dims, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a3c1454dffac13453fde6f5e1b8a33e464301266 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_vector_norm_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor linalg_vector_norm(const at::Tensor & self, const at::Scalar & ord=2, at::OptionalIntArrayRef dim=c10::nullopt, bool keepdim=false, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor & linalg_vector_norm_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & ord=2, at::OptionalIntArrayRef dim=c10::nullopt, bool keepdim=false, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor & linalg_vector_norm_outf(const at::Tensor & self, const at::Scalar & ord, at::OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logical_xor_cpu_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logical_xor_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..94cd5b7588adbf37f7465c8a7867df5121f453ea --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logical_xor_cpu_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor & logical_xor_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & logical_xor_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lshift_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lshift_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9291e3eb2f4295d1cb6cea65d416ca6df50bf5cb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/lshift_native.h @@ -0,0 +1,26 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & __lshift___Scalar_out(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor __lshift__(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & __ilshift__(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & __lshift___Tensor_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor __lshift__(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & __ilshift__(at::Tensor & self, const at::Tensor & other); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..49b791d699a2e0072140a47901025a76f22aab91 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple max_pool2d_with_indices_out(at::Tensor & out, at::Tensor & indices, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool2d_with_indices_out::call(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices); +} +// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple max_pool2d_with_indices_outf(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices) { + return at::_ops::max_pool2d_with_indices_out::call(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices); +} + +// aten::max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) +inline ::std::tuple max_pool2d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool2d_with_indices::call(self, kernel_size, stride, padding, dilation, ceil_mode); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool3d.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool3d.h new file mode 100644 index 0000000000000000000000000000000000000000..c2749f33289af24060ff34e8f178f885e3b2ee2f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool3d.h @@ -0,0 +1,30 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor +inline at::Tensor max_pool3d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool3d::call(self, kernel_size, stride, padding, dilation, ceil_mode); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a1399eaa5575a1a830d7a1d493a03595302cb697 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API mkldnn_reorder_conv2d_weight { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, at::OptionalSymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::mkldnn_reorder_conv2d_weight") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor") + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size); +}; + +struct TORCH_API mkldnn_reorder_conv2d_weight_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, at::OptionalSymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::mkldnn_reorder_conv2d_weight") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "mkldnn_reorder_conv2d_weight.out(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/multilabel_margin_loss_backward_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/multilabel_margin_loss_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6cc719c973ec0d2d3e9a38e95f79548da8caef34 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/multilabel_margin_loss_backward_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API multilabel_margin_loss_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multilabel_margin_loss_backward") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multilabel_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target, *, Tensor(a!) grad_input) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, const at::Tensor & is_target, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, const at::Tensor & is_target, at::Tensor & grad_input); +}; + +struct TORCH_API multilabel_margin_loss_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multilabel_margin_loss_backward") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multilabel_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target) -> Tensor") + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, const at::Tensor & is_target); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, const at::Tensor & is_target); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/multiply_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/multiply_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a7c3737e20c0daba70dd8f2375d55205c6bd7275 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/multiply_ops.h @@ -0,0 +1,72 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API multiply_Tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multiply") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multiply.Tensor(Tensor self, Tensor other) -> Tensor") + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API multiply__Tensor { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multiply_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Tensor") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multiply_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)") + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API multiply_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multiply") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multiply.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +struct TORCH_API multiply_Scalar { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multiply") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Scalar") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multiply.Scalar(Tensor self, Scalar other) -> Tensor") + static at::Tensor call(const at::Tensor & self, const at::Scalar & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other); +}; + +struct TORCH_API multiply__Scalar { + using schema = at::Tensor & (at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::multiply_") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "Scalar") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "multiply_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)") + static at::Tensor & call(at::Tensor & self, const at::Scalar & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Scalar & other); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/native_group_norm_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/native_group_norm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..10aa0a63b38604ecb3adbbbed22f39e80f1a4f2b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/native_group_norm_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API native_group_norm { + using schema = ::std::tuple (const at::Tensor &, const c10::optional &, const c10::optional &, c10::SymInt, c10::SymInt, c10::SymInt, int64_t, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::native_group_norm") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "native_group_norm(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps) -> (Tensor, Tensor, Tensor)") + static ::std::tuple call(const at::Tensor & input, const c10::optional & weight, const c10::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const c10::optional & weight, const c10::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps); +}; + +struct TORCH_API native_group_norm_out { + using schema = ::std::tuple (const at::Tensor &, const c10::optional &, const c10::optional &, c10::SymInt, c10::SymInt, c10::SymInt, int64_t, double, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::native_group_norm") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "native_group_norm.out(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))") + static ::std::tuple call(const at::Tensor & input, const c10::optional & weight, const c10::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const c10::optional & weight, const c10::optional & bias, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/promote_types_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/promote_types_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a499c5e891e9b4da722a4a75047b11846bbac773 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/promote_types_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API promote_types { + using schema = at::ScalarType (at::ScalarType, at::ScalarType); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::promote_types") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "promote_types(ScalarType type1, ScalarType type2) -> ScalarType") + static at::ScalarType call(at::ScalarType type1, at::ScalarType type2); + static at::ScalarType redispatch(c10::DispatchKeySet dispatchKeySet, at::ScalarType type1, at::ScalarType type2); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/quantize_per_channel_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/quantize_per_channel_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ec489fbcaff32166901bc3c88a7ed71c19f82d8a --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/quantize_per_channel_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor quantize_per_channel(const at::Tensor & self, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::ScalarType dtype); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rand_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rand_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7330c5fe351fb660425366c5b95c58cb1f9bae31 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rand_compositeimplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & rand_out(at::Tensor & out, at::IntArrayRef size, c10::optional generator); +TORCH_API at::Tensor & rand_outf(at::IntArrayRef size, c10::optional generator, at::Tensor & out); +TORCH_API at::Tensor & rand_symint_out(at::Tensor & out, c10::SymIntArrayRef size, c10::optional generator); +TORCH_API at::Tensor & rand_symint_outf(c10::SymIntArrayRef size, c10::optional generator, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/randint_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/randint_native.h new file mode 100644 index 0000000000000000000000000000000000000000..52c20df898aab67161501140921575b51f438d44 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/randint_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor randint(int64_t high, at::IntArrayRef size, c10::optional dtype={}, c10::optional layout={}, c10::optional device={}, c10::optional pin_memory={}); +TORCH_API at::Tensor & randint_out(int64_t high, at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor randint(int64_t high, at::IntArrayRef size, c10::optional generator, c10::optional dtype={}, c10::optional layout={}, c10::optional device={}, c10::optional pin_memory={}); +TORCH_API at::Tensor & randint_out(int64_t high, at::IntArrayRef size, c10::optional generator, at::Tensor & out); +TORCH_API at::Tensor randint(int64_t low, int64_t high, at::IntArrayRef size, c10::optional dtype={}, c10::optional layout={}, c10::optional device={}, c10::optional pin_memory={}); +TORCH_API at::Tensor & randint_out(int64_t low, int64_t high, at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor randint(int64_t low, int64_t high, at::IntArrayRef size, c10::optional generator, c10::optional dtype={}, c10::optional layout={}, c10::optional device={}, c10::optional pin_memory={}); +TORCH_API at::Tensor & randint_out(int64_t low, int64_t high, at::IntArrayRef size, c10::optional generator, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ravel_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ravel_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..32399bd67141f716b05d26dfa46e036f35c00bc2 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/ravel_ops.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API ravel { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::ravel") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "ravel(Tensor(a) self) -> Tensor(a)") + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/refine_names_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/refine_names_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6de924f2068befcfc9115189dfc0de17beb1c95b --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/refine_names_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor refine_names(const at::Tensor & self, at::DimnameList names); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/repeat_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/repeat_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cf4034a032c3516b1685c1f987194d17f155b245 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/repeat_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor repeat(const at::Tensor & self, at::IntArrayRef repeats); +TORCH_API at::Tensor repeat_symint(const at::Tensor & self, c10::SymIntArrayRef repeats); +TORCH_API at::Tensor & repeat_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef repeats); +TORCH_API at::Tensor & repeat_outf(const at::Tensor & self, at::IntArrayRef repeats, at::Tensor & out); +TORCH_API at::Tensor & repeat_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef repeats); +TORCH_API at::Tensor & repeat_symint_outf(const at::Tensor & self, c10::SymIntArrayRef repeats, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rnn_relu.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rnn_relu.h new file mode 100644 index 0000000000000000000000000000000000000000..a69d6aca8b3821470195e63853ed8dfbb04f570a --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/rnn_relu.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::rnn_relu.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor) +inline ::std::tuple rnn_relu(const at::Tensor & input, const at::Tensor & hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional, bool batch_first) { + return at::_ops::rnn_relu_input::call(input, hx, params, has_biases, num_layers, dropout, train, bidirectional, batch_first); +} + +// aten::rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor) +inline ::std::tuple rnn_relu(const at::Tensor & data, const at::Tensor & batch_sizes, const at::Tensor & hx, at::TensorList params, bool has_biases, int64_t num_layers, double dropout, bool train, bool bidirectional) { + return at::_ops::rnn_relu_data::call(data, batch_sizes, hx, params, has_biases, num_layers, dropout, train, bidirectional); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/signbit_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/signbit_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1c754fdf4d392204fd3b5da147ada2407b2d2846 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/signbit_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor signbit(const at::Tensor & self); +TORCH_API at::Tensor & signbit_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & signbit_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_erfinv_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_erfinv_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e7e2983dd345e6837f5d82a5b626491e7f40e7b9 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_erfinv_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_erfinv { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::special_erfinv") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "special_erfinv(Tensor self) -> Tensor") + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_erfinv_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::special_erfinv") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "special_erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_gammainc.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_gammainc.h new file mode 100644 index 0000000000000000000000000000000000000000..03c4847a493335b071a5a5462a1ebeb8c5656a2f --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_gammainc.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_gammainc.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_gammainc_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::special_gammainc_out::call(self, other, out); +} +// aten::special_gammainc.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_gammainc_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::special_gammainc_out::call(self, other, out); +} + +// aten::special_gammainc(Tensor self, Tensor other) -> Tensor +inline at::Tensor special_gammainc(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::special_gammainc::call(self, other); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_i1e_cuda_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_i1e_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..012bace7d98803a68e3ed53fd099e474bf596271 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_i1e_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_i1e(const at::Tensor & self); +TORCH_API at::Tensor & special_i1e_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_i1e_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_log_softmax_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_log_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..adaa4d299c741a257db5f9f11d4336c31b9a0dae --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/special_log_softmax_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor special_log_softmax(const at::Tensor & self, int64_t dim, c10::optional dtype=c10::nullopt); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/split_with_sizes_compositeexplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/split_with_sizes_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a89b80d7d030d2e10abebac15462bb8d9d786f0e --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/split_with_sizes_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector split_with_sizes(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim=0); +TORCH_API ::std::vector split_with_sizes_symint(const at::Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim=0); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/stack.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/stack.h new file mode 100644 index 0000000000000000000000000000000000000000..ec73578c44422ed514826f7e99dd462be0468bda --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/stack.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::stack(Tensor[] tensors, int dim=0) -> Tensor +inline at::Tensor stack(at::TensorList tensors, int64_t dim=0) { + return at::_ops::stack::call(tensors, dim); +} + +// aten::stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & stack_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0) { + return at::_ops::stack_out::call(tensors, dim, out); +} +// aten::stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & stack_outf(at::TensorList tensors, int64_t dim, at::Tensor & out) { + return at::_ops::stack_out::call(tensors, dim, out); +} + +} diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sum_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sum_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4d3a8f1f4e910bdfb38bb9c821d348da87ec5cd2 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/sum_native.h @@ -0,0 +1,32 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor sum(const at::Tensor & self, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor & sum_out(const at::Tensor & self, c10::optional dtype, at::Tensor & out); +TORCH_API at::Tensor sum_coo(const at::Tensor & self, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor sum_csr(const at::Tensor & self, c10::optional dtype=c10::nullopt); +struct TORCH_API structured_sum_out : public at::meta::structured_sum_dim_IntList { +void impl(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, c10::optional dtype, const at::Tensor & out); +}; +TORCH_API at::Tensor NestedTensor_sum_dim_CPU(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor sum_sparse_coo(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor sum_sparse_compressed(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor sum(const at::Tensor & self, at::DimnameList dim, bool keepdim=false, c10::optional dtype=c10::nullopt); +TORCH_API at::Tensor & sum_out(const at::Tensor & self, at::DimnameList dim, bool keepdim, c10::optional dtype, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/swapdims_compositeimplicitautograd_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/swapdims_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f28095cff8865335712fdbfcdd4c06dfc57e5543 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/swapdims_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor swapdims(const at::Tensor & self, int64_t dim0, int64_t dim1); +TORCH_API at::Tensor & swapdims_(at::Tensor & self, int64_t dim0, int64_t dim1); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/take_along_dim_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/take_along_dim_native.h new file mode 100644 index 0000000000000000000000000000000000000000..25a0650ed30041a9f3f9ff98d5223f07c5d2b9eb --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/take_along_dim_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor take_along_dim(const at::Tensor & self, const at::Tensor & indices, c10::optional dim=c10::nullopt); +TORCH_API at::Tensor & take_along_dim_out(const at::Tensor & self, const at::Tensor & indices, c10::optional dim, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/to_sparse_bsr_native.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/to_sparse_bsr_native.h new file mode 100644 index 0000000000000000000000000000000000000000..7bac6164f04d04ca4afbd1cf15b4efee5c89c310 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/to_sparse_bsr_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, c10::optional dense_dim=c10::nullopt); +} // namespace native +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/upsample_linear1d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/upsample_linear1d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..67d52a05eb35e18b25a9c6eef7d4fa2dfb281c96 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/upsample_linear1d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor upsample_linear1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, c10::optional scales=c10::nullopt); +TORCH_API at::Tensor upsample_linear1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, c10::optional scales=c10::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/upsample_nearest3d_backward_ops.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/upsample_nearest3d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..922d5a9c38148f66237c7bb56e6d267357cf7ed0 --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/upsample_nearest3d_backward_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API upsample_nearest3d_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::optional, c10::optional, c10::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::upsample_nearest3d_backward") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "upsample_nearest3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w, at::Tensor & grad_input); +}; + +struct TORCH_API upsample_nearest3d_backward { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::optional, c10::optional, c10::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::upsample_nearest3d_backward") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor") + static at::Tensor call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, c10::optional scales_d, c10::optional scales_h, c10::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/parallel/__pycache__/distributed.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/parallel/__pycache__/distributed.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e38a5be1766eeb2696cc3da525350ab2405ad6d --- /dev/null +++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/parallel/__pycache__/distributed.cpython-311.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d8cd47a1a5a9e6d99decc705d3030ffd28c96e29f82ac08767d9e05e7a8c57 +size 111297